Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| 11 /// \brief Implements the TargetLoweringX86Base class, which consists almost | 11 /// \brief Implements the TargetLoweringX86Base class, which consists almost |
| 12 /// entirely of the lowering sequence for each high-level instruction. | 12 /// entirely of the lowering sequence for each high-level instruction. |
| 13 /// | 13 /// |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
| 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| 18 | 18 |
| 19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
| 20 #include "IceCfgNode.h" | 20 #include "IceCfgNode.h" |
| 21 #include "IceClFlags.h" | 21 #include "IceClFlags.h" |
| 22 #include "IceDefs.h" | 22 #include "IceDefs.h" |
| 23 #include "IceELFObjectWriter.h" | 23 #include "IceELFObjectWriter.h" |
| 24 #include "IceGlobalInits.h" | 24 #include "IceGlobalInits.h" |
| 25 #include "IceInstVarIter.h" | 25 #include "IceInstVarIter.h" |
| 26 #include "IceLiveness.h" | 26 #include "IceLiveness.h" |
| 27 #include "IceOperand.h" | 27 #include "IceOperand.h" |
| 28 #include "IcePhiLoweringImpl.h" | 28 #include "IcePhiLoweringImpl.h" |
| 29 #include "IceUtils.h" | 29 #include "IceUtils.h" |
| 30 #include "IceInstX86Base.h" | |
|
Jim Stichnoth
2016/01/03 18:20:02
alphabetize includes
| |
| 30 #include "llvm/Support/MathExtras.h" | 31 #include "llvm/Support/MathExtras.h" |
| 31 | 32 |
| 32 #include <stack> | 33 #include <stack> |
| 33 | 34 |
| 34 namespace Ice { | 35 namespace Ice { |
| 35 namespace X86Internal { | 36 namespace X86NAMESPACE { |
| 36 | 37 |
| 37 /// A helper class to ease the settings of RandomizationPoolingPause to disable | 38 /// A helper class to ease the settings of RandomizationPoolingPause to disable |
| 38 /// constant blinding or pooling for some translation phases. | 39 /// constant blinding or pooling for some translation phases. |
| 39 class BoolFlagSaver { | 40 class BoolFlagSaver { |
| 40 BoolFlagSaver() = delete; | 41 BoolFlagSaver() = delete; |
| 41 BoolFlagSaver(const BoolFlagSaver &) = delete; | 42 BoolFlagSaver(const BoolFlagSaver &) = delete; |
| 42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; | 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; |
| 43 | 44 |
| 44 public: | 45 public: |
| 45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } | 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } |
| 46 ~BoolFlagSaver() { Flag = OldValue; } | 47 ~BoolFlagSaver() { Flag = OldValue; } |
| 47 | 48 |
| 48 private: | 49 private: |
| 49 const bool OldValue; | 50 const bool OldValue; |
| 50 bool &Flag; | 51 bool &Flag; |
| 51 }; | 52 }; |
| 52 | 53 |
| 53 template <class MachineTraits> class BoolFoldingEntry { | 54 template <typename Traits> class BoolFoldingEntry { |
| 54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | 55 BoolFoldingEntry(const BoolFoldingEntry &) = delete; |
| 55 | 56 |
| 56 public: | 57 public: |
| 57 BoolFoldingEntry() = default; | 58 BoolFoldingEntry() = default; |
| 58 explicit BoolFoldingEntry(Inst *I); | 59 explicit BoolFoldingEntry(Inst *I); |
| 59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | 60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; |
| 60 /// Instr is the instruction producing the i1-type variable of interest. | 61 /// Instr is the instruction producing the i1-type variable of interest. |
| 61 Inst *Instr = nullptr; | 62 Inst *Instr = nullptr; |
| 62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | 63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). |
| 63 bool IsComplex = false; | 64 bool IsComplex = false; |
| 64 /// IsLiveOut is initialized conservatively to true, and is set to false when | 65 /// IsLiveOut is initialized conservatively to true, and is set to false when |
| 65 /// we encounter an instruction that ends Var's live range. We disable the | 66 /// we encounter an instruction that ends Var's live range. We disable the |
| 66 /// folding optimization when Var is live beyond this basic block. Note that | 67 /// folding optimization when Var is live beyond this basic block. Note that |
| 67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | 68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will |
| 68 /// always be true and the folding optimization will never be performed. | 69 /// always be true and the folding optimization will never be performed. |
| 69 bool IsLiveOut = true; | 70 bool IsLiveOut = true; |
| 70 // NumUses counts the number of times Var is used as a source operand in the | 71 // NumUses counts the number of times Var is used as a source operand in the |
| 71 // basic block. If IsComplex is true and there is more than one use of Var, | 72 // basic block. If IsComplex is true and there is more than one use of Var, |
| 72 // then the folding optimization is disabled for Var. | 73 // then the folding optimization is disabled for Var. |
| 73 uint32_t NumUses = 0; | 74 uint32_t NumUses = 0; |
| 74 }; | 75 }; |
| 75 | 76 |
| 76 template <class MachineTraits> class BoolFolding { | 77 template <typename Traits> class BoolFolding { |
| 77 public: | 78 public: |
| 78 enum BoolFoldingProducerKind { | 79 enum BoolFoldingProducerKind { |
| 79 PK_None, | 80 PK_None, |
| 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. | 81 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. |
| 81 PK_Icmp32, | 82 PK_Icmp32, |
| 82 PK_Icmp64, | 83 PK_Icmp64, |
| 83 PK_Fcmp, | 84 PK_Fcmp, |
| 84 PK_Trunc, | 85 PK_Trunc, |
| 85 PK_Arith // A flag-setting arithmetic instruction. | 86 PK_Arith // A flag-setting arithmetic instruction. |
| 86 }; | 87 }; |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 106 void dump(const Cfg *Func) const; | 107 void dump(const Cfg *Func) const; |
| 107 | 108 |
| 108 private: | 109 private: |
| 109 /// Returns true if Producers contains a valid entry for the given VarNum. | 110 /// Returns true if Producers contains a valid entry for the given VarNum. |
| 110 bool containsValid(SizeT VarNum) const { | 111 bool containsValid(SizeT VarNum) const { |
| 111 auto Element = Producers.find(VarNum); | 112 auto Element = Producers.find(VarNum); |
| 112 return Element != Producers.end() && Element->second.Instr != nullptr; | 113 return Element != Producers.end() && Element->second.Instr != nullptr; |
| 113 } | 114 } |
| 114 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | 115 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } |
| 115 /// Producers maps Variable::Number to a BoolFoldingEntry. | 116 /// Producers maps Variable::Number to a BoolFoldingEntry. |
| 116 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; | 117 std::unordered_map<SizeT, BoolFoldingEntry<Traits>> Producers; |
| 117 }; | 118 }; |
| 118 | 119 |
| 119 template <class MachineTraits> | 120 template <typename Traits> |
| 120 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | 121 BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I) |
| 121 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 122 : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {} |
| 122 | 123 |
| 123 template <class MachineTraits> | 124 template <typename Traits> |
| 124 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 125 typename BoolFolding<Traits>::BoolFoldingProducerKind |
| 125 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 126 BoolFolding<Traits>::getProducerKind(const Inst *Instr) { |
| 126 if (llvm::isa<InstIcmp>(Instr)) { | 127 if (llvm::isa<InstIcmp>(Instr)) { |
| 127 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) | 128 if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) |
| 128 return PK_Icmp32; | 129 return PK_Icmp32; |
| 129 return PK_Icmp64; | 130 return PK_Icmp64; |
| 130 } | 131 } |
| 131 if (llvm::isa<InstFcmp>(Instr)) | 132 if (llvm::isa<InstFcmp>(Instr)) |
| 132 return PK_Fcmp; | 133 return PK_Fcmp; |
| 133 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 134 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { |
| 134 if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) { | 135 if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) { |
| 135 switch (Arith->getOp()) { | 136 switch (Arith->getOp()) { |
| 136 default: | 137 default: |
| 137 return PK_None; | 138 return PK_None; |
| 138 case InstArithmetic::And: | 139 case InstArithmetic::And: |
| 139 case InstArithmetic::Or: | 140 case InstArithmetic::Or: |
| 140 return PK_Arith; | 141 return PK_Arith; |
| 141 } | 142 } |
| 142 } | 143 } |
| 143 } | 144 } |
| 144 return PK_None; // TODO(stichnot): remove this | 145 return PK_None; // TODO(stichnot): remove this |
| 145 | 146 |
| 146 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 147 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 147 switch (Cast->getCastKind()) { | 148 switch (Cast->getCastKind()) { |
| 148 default: | 149 default: |
| 149 return PK_None; | 150 return PK_None; |
| 150 case InstCast::Trunc: | 151 case InstCast::Trunc: |
| 151 return PK_Trunc; | 152 return PK_Trunc; |
| 152 } | 153 } |
| 153 } | 154 } |
| 154 return PK_None; | 155 return PK_None; |
| 155 } | 156 } |
| 156 | 157 |
| 157 template <class MachineTraits> | 158 template <typename Traits> |
| 158 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind | 159 typename BoolFolding<Traits>::BoolFoldingConsumerKind |
| 159 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) { | 160 BoolFolding<Traits>::getConsumerKind(const Inst *Instr) { |
| 160 if (llvm::isa<InstBr>(Instr)) | 161 if (llvm::isa<InstBr>(Instr)) |
| 161 return CK_Br; | 162 return CK_Br; |
| 162 if (llvm::isa<InstSelect>(Instr)) | 163 if (llvm::isa<InstSelect>(Instr)) |
| 163 return CK_Select; | 164 return CK_Select; |
| 164 return CK_None; // TODO(stichnot): remove this | 165 return CK_None; // TODO(stichnot): remove this |
| 165 | 166 |
| 166 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 167 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 167 switch (Cast->getCastKind()) { | 168 switch (Cast->getCastKind()) { |
| 168 default: | 169 default: |
| 169 return CK_None; | 170 return CK_None; |
| 170 case InstCast::Sext: | 171 case InstCast::Sext: |
| 171 return CK_Sext; | 172 return CK_Sext; |
| 172 case InstCast::Zext: | 173 case InstCast::Zext: |
| 173 return CK_Zext; | 174 return CK_Zext; |
| 174 } | 175 } |
| 175 } | 176 } |
| 176 return CK_None; | 177 return CK_None; |
| 177 } | 178 } |
| 178 | 179 |
| 179 /// Returns true if the producing instruction has a "complex" lowering sequence. | 180 /// Returns true if the producing instruction has a "complex" lowering sequence. |
| 180 /// This generally means that its lowering sequence requires more than one | 181 /// This generally means that its lowering sequence requires more than one |
| 181 /// conditional branch, namely 64-bit integer compares and some floating-point | 182 /// conditional branch, namely 64-bit integer compares and some floating-point |
| 182 /// compares. When this is true, and there is more than one consumer, we prefer | 183 /// compares. When this is true, and there is more than one consumer, we prefer |
| 183 /// to disable the folding optimization because it minimizes branches. | 184 /// to disable the folding optimization because it minimizes branches. |
| 184 template <class MachineTraits> | 185 template <typename Traits> |
| 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 186 bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) { |
| 186 switch (getProducerKind(Instr)) { | 187 switch (getProducerKind(Instr)) { |
| 187 default: | 188 default: |
| 188 return false; | 189 return false; |
| 189 case PK_Icmp64: | 190 case PK_Icmp64: |
| 190 return !MachineTraits::Is64Bit; | 191 return !Traits::Is64Bit; |
| 191 case PK_Fcmp: | 192 case PK_Fcmp: |
| 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 193 return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != |
| 193 .C2 != MachineTraits::Cond::Br_None; | 194 Traits::Cond::Br_None; |
| 194 } | 195 } |
| 195 } | 196 } |
| 196 | 197 |
| 197 template <class MachineTraits> | 198 template <typename Traits> |
| 198 bool BoolFolding<MachineTraits>::isValidFolding( | 199 bool BoolFolding<Traits>::isValidFolding( |
| 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, | 200 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind, |
| 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { | 201 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) { |
| 201 switch (ProducerKind) { | 202 switch (ProducerKind) { |
| 202 default: | 203 default: |
| 203 return false; | 204 return false; |
| 204 case PK_Icmp32: | 205 case PK_Icmp32: |
| 205 case PK_Icmp64: | 206 case PK_Icmp64: |
| 206 case PK_Fcmp: | 207 case PK_Fcmp: |
| 207 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); | 208 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); |
| 208 case PK_Arith: | 209 case PK_Arith: |
| 209 return ConsumerKind == CK_Br; | 210 return ConsumerKind == CK_Br; |
| 210 } | 211 } |
| 211 } | 212 } |
| 212 | 213 |
| 213 template <class MachineTraits> | 214 template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) { |
| 214 void BoolFolding<MachineTraits>::init(CfgNode *Node) { | |
| 215 Producers.clear(); | 215 Producers.clear(); |
| 216 for (Inst &Instr : Node->getInsts()) { | 216 for (Inst &Instr : Node->getInsts()) { |
| 217 // Check whether Instr is a valid producer. | 217 // Check whether Instr is a valid producer. |
| 218 Variable *Var = Instr.getDest(); | 218 Variable *Var = Instr.getDest(); |
| 219 if (!Instr.isDeleted() // only consider non-deleted instructions | 219 if (!Instr.isDeleted() // only consider non-deleted instructions |
| 220 && Var // only instructions with an actual dest var | 220 && Var // only instructions with an actual dest var |
| 221 && Var->getType() == IceType_i1 // only bool-type dest vars | 221 && Var->getType() == IceType_i1 // only bool-type dest vars |
| 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions | 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
| 223 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); | 223 Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr); |
| 224 } | 224 } |
| 225 // Check each src variable against the map. | 225 // Check each src variable against the map. |
| 226 FOREACH_VAR_IN_INST(Var, Instr) { | 226 FOREACH_VAR_IN_INST(Var, Instr) { |
| 227 SizeT VarNum = Var->getIndex(); | 227 SizeT VarNum = Var->getIndex(); |
| 228 if (!containsValid(VarNum)) | 228 if (!containsValid(VarNum)) |
| 229 continue; | 229 continue; |
| 230 // All valid consumers use Var as the first source operand | 230 // All valid consumers use Var as the first source operand |
| 231 if (IndexOfVarOperandInInst(Var) != 0) { | 231 if (IndexOfVarOperandInInst(Var) != 0) { |
| 232 setInvalid(VarNum); | 232 setInvalid(VarNum); |
| 233 continue; | 233 continue; |
| 234 } | 234 } |
| 235 // Consumer instructions must be white-listed | 235 // Consumer instructions must be white-listed |
| 236 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind | 236 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind = |
| 237 ConsumerKind = getConsumerKind(&Instr); | 237 getConsumerKind(&Instr); |
| 238 if (ConsumerKind == CK_None) { | 238 if (ConsumerKind == CK_None) { |
| 239 setInvalid(VarNum); | 239 setInvalid(VarNum); |
| 240 continue; | 240 continue; |
| 241 } | 241 } |
| 242 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 242 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind = |
| 243 ProducerKind = getProducerKind(Producers[VarNum].Instr); | 243 getProducerKind(Producers[VarNum].Instr); |
| 244 if (!isValidFolding(ProducerKind, ConsumerKind)) { | 244 if (!isValidFolding(ProducerKind, ConsumerKind)) { |
| 245 setInvalid(VarNum); | 245 setInvalid(VarNum); |
| 246 continue; | 246 continue; |
| 247 } | 247 } |
| 248 // Avoid creating multiple copies of complex producer instructions. | 248 // Avoid creating multiple copies of complex producer instructions. |
| 249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { | 249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { |
| 250 setInvalid(VarNum); | 250 setInvalid(VarNum); |
| 251 continue; | 251 continue; |
| 252 } | 252 } |
| 253 ++Producers[VarNum].NumUses; | 253 ++Producers[VarNum].NumUses; |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 266 continue; | 266 continue; |
| 267 } | 267 } |
| 268 // Mark as "dead" rather than outright deleting. This is so that other | 268 // Mark as "dead" rather than outright deleting. This is so that other |
| 269 // peephole style optimizations during or before lowering have access to | 269 // peephole style optimizations during or before lowering have access to |
| 270 // this instruction in undeleted form. See for example | 270 // this instruction in undeleted form. See for example |
| 271 // tryOptimizedCmpxchgCmpBr(). | 271 // tryOptimizedCmpxchgCmpBr(). |
| 272 I.second.Instr->setDead(); | 272 I.second.Instr->setDead(); |
| 273 } | 273 } |
| 274 } | 274 } |
| 275 | 275 |
| 276 template <class MachineTraits> | 276 template <typename Traits> |
| 277 const Inst * | 277 const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const { |
| 278 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { | |
| 279 auto *Var = llvm::dyn_cast<const Variable>(Opnd); | 278 auto *Var = llvm::dyn_cast<const Variable>(Opnd); |
| 280 if (Var == nullptr) | 279 if (Var == nullptr) |
| 281 return nullptr; | 280 return nullptr; |
| 282 SizeT VarNum = Var->getIndex(); | 281 SizeT VarNum = Var->getIndex(); |
| 283 auto Element = Producers.find(VarNum); | 282 auto Element = Producers.find(VarNum); |
| 284 if (Element == Producers.end()) | 283 if (Element == Producers.end()) |
| 285 return nullptr; | 284 return nullptr; |
| 286 return Element->second.Instr; | 285 return Element->second.Instr; |
| 287 } | 286 } |
| 288 | 287 |
| 289 template <class MachineTraits> | 288 template <typename Traits> |
| 290 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const { | 289 void BoolFolding<Traits>::dump(const Cfg *Func) const { |
| 291 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) | 290 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) |
| 292 return; | 291 return; |
| 293 OstreamLocker L(Func->getContext()); | 292 OstreamLocker L(Func->getContext()); |
| 294 Ostream &Str = Func->getContext()->getStrDump(); | 293 Ostream &Str = Func->getContext()->getStrDump(); |
| 295 for (auto &I : Producers) { | 294 for (auto &I : Producers) { |
| 296 if (I.second.Instr == nullptr) | 295 if (I.second.Instr == nullptr) |
| 297 continue; | 296 continue; |
| 298 Str << "Found foldable producer:\n "; | 297 Str << "Found foldable producer:\n "; |
| 299 I.second.Instr->dump(Func); | 298 I.second.Instr->dump(Func); |
| 300 Str << "\n"; | 299 Str << "\n"; |
| 301 } | 300 } |
| 302 } | 301 } |
| 303 | 302 |
| 304 template <class Machine> | 303 template <typename TraitsType> |
| 305 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) { | 304 void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) { |
| 306 FoldingInfo.init(Node); | 305 FoldingInfo.init(Node); |
| 307 FoldingInfo.dump(Func); | 306 FoldingInfo.dump(Func); |
| 308 } | 307 } |
| 309 | 308 |
| 310 template <class Machine> | 309 template <typename TraitsType> |
| 311 TargetX86Base<Machine>::TargetX86Base(Cfg *Func) | 310 TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func) |
| 312 : TargetLowering(Func) { | 311 : TargetLowering(Func) { |
| 313 static_assert( | 312 static_assert( |
| 314 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == | 313 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == |
| 315 (TargetInstructionSet::X86InstructionSet_End - | 314 (TargetInstructionSet::X86InstructionSet_End - |
| 316 TargetInstructionSet::X86InstructionSet_Begin), | 315 TargetInstructionSet::X86InstructionSet_Begin), |
| 317 "Traits::InstructionSet range different from TargetInstructionSet"); | 316 "Traits::InstructionSet range different from TargetInstructionSet"); |
| 318 if (Func->getContext()->getFlags().getTargetInstructionSet() != | 317 if (Func->getContext()->getFlags().getTargetInstructionSet() != |
| 319 TargetInstructionSet::BaseInstructionSet) { | 318 TargetInstructionSet::BaseInstructionSet) { |
| 320 InstructionSet = static_cast<typename Traits::InstructionSet>( | 319 InstructionSet = static_cast<InstructionSetEnum>( |
| 321 (Func->getContext()->getFlags().getTargetInstructionSet() - | 320 (Func->getContext()->getFlags().getTargetInstructionSet() - |
| 322 TargetInstructionSet::X86InstructionSet_Begin) + | 321 TargetInstructionSet::X86InstructionSet_Begin) + |
| 323 Traits::InstructionSet::Begin); | 322 Traits::InstructionSet::Begin); |
| 324 } | 323 } |
| 325 } | 324 } |
| 326 | 325 |
| 327 template <class Machine> void TargetX86Base<Machine>::staticInit() { | 326 template <typename TraitsType> void TargetX86Base<TraitsType>::staticInit() { |
| 328 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); | 327 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); |
| 329 } | 328 } |
| 330 | 329 |
| 331 template <class Machine> void TargetX86Base<Machine>::translateO2() { | 330 template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() { |
| 332 TimerMarker T(TimerStack::TT_O2, Func); | 331 TimerMarker T(TimerStack::TT_O2, Func); |
| 333 | 332 |
| 334 genTargetHelperCalls(); | 333 genTargetHelperCalls(); |
| 335 Func->dump("After target helper call insertion"); | 334 Func->dump("After target helper call insertion"); |
| 336 | 335 |
| 337 // Merge Alloca instructions, and lay out the stack. | 336 // Merge Alloca instructions, and lay out the stack. |
| 338 static constexpr bool SortAndCombineAllocas = true; | 337 static constexpr bool SortAndCombineAllocas = true; |
| 339 Func->processAllocas(SortAndCombineAllocas); | 338 Func->processAllocas(SortAndCombineAllocas); |
| 340 Func->dump("After Alloca processing"); | 339 Func->dump("After Alloca processing"); |
| 341 | 340 |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 447 Func->dump("After branch optimization"); | 446 Func->dump("After branch optimization"); |
| 448 | 447 |
| 449 // Nop insertion if -nop-insertion is enabled. | 448 // Nop insertion if -nop-insertion is enabled. |
| 450 Func->doNopInsertion(); | 449 Func->doNopInsertion(); |
| 451 | 450 |
| 452 // Mark nodes that require sandbox alignment | 451 // Mark nodes that require sandbox alignment |
| 453 if (Ctx->getFlags().getUseSandboxing()) | 452 if (Ctx->getFlags().getUseSandboxing()) |
| 454 Func->markNodesForSandboxing(); | 453 Func->markNodesForSandboxing(); |
| 455 } | 454 } |
| 456 | 455 |
| 457 template <class Machine> void TargetX86Base<Machine>::translateOm1() { | 456 template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() { |
| 458 TimerMarker T(TimerStack::TT_Om1, Func); | 457 TimerMarker T(TimerStack::TT_Om1, Func); |
| 459 | 458 |
| 460 genTargetHelperCalls(); | 459 genTargetHelperCalls(); |
| 461 | 460 |
| 462 // Do not merge Alloca instructions, and lay out the stack. | 461 // Do not merge Alloca instructions, and lay out the stack. |
| 463 static constexpr bool SortAndCombineAllocas = false; | 462 static constexpr bool SortAndCombineAllocas = false; |
| 464 Func->processAllocas(SortAndCombineAllocas); | 463 Func->processAllocas(SortAndCombineAllocas); |
| 465 Func->dump("After Alloca processing"); | 464 Func->dump("After Alloca processing"); |
| 466 | 465 |
| 467 Func->placePhiLoads(); | 466 Func->placePhiLoads(); |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 524 case InstArithmetic::Xor: | 523 case InstArithmetic::Xor: |
| 525 return true; | 524 return true; |
| 526 case InstArithmetic::Shl: | 525 case InstArithmetic::Shl: |
| 527 case InstArithmetic::Lshr: | 526 case InstArithmetic::Lshr: |
| 528 case InstArithmetic::Ashr: | 527 case InstArithmetic::Ashr: |
| 529 return false; // TODO(stichnot): implement | 528 return false; // TODO(stichnot): implement |
| 530 return !isI64; | 529 return !isI64; |
| 531 } | 530 } |
| 532 } | 531 } |
| 533 | 532 |
| 534 template <class Machine> | 533 template <typename TraitsType> |
| 535 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { | 534 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { |
| 536 if (A == B) | 535 if (A == B) |
| 537 return true; | 536 return true; |
| 538 if (auto *MemA = llvm::dyn_cast< | 537 if (auto *MemA = |
| 539 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) { | 538 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>( |
| 540 if (auto *MemB = llvm::dyn_cast< | 539 A)) { |
| 541 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) { | 540 if (auto *MemB = |
| 541 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>( | |
| 542 B)) { | |
| 542 return MemA->getBase() == MemB->getBase() && | 543 return MemA->getBase() == MemB->getBase() && |
| 543 MemA->getOffset() == MemB->getOffset() && | 544 MemA->getOffset() == MemB->getOffset() && |
| 544 MemA->getIndex() == MemB->getIndex() && | 545 MemA->getIndex() == MemB->getIndex() && |
| 545 MemA->getShift() == MemB->getShift() && | 546 MemA->getShift() == MemB->getShift() && |
| 546 MemA->getSegmentRegister() == MemB->getSegmentRegister(); | 547 MemA->getSegmentRegister() == MemB->getSegmentRegister(); |
| 547 } | 548 } |
| 548 } | 549 } |
| 549 return false; | 550 return false; |
| 550 } | 551 } |
| 551 | 552 |
| 552 template <class Machine> void TargetX86Base<Machine>::findRMW() { | 553 template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() { |
| 553 Func->dump("Before RMW"); | 554 Func->dump("Before RMW"); |
| 554 if (Func->isVerbose(IceV_RMW)) | 555 if (Func->isVerbose(IceV_RMW)) |
| 555 Func->getContext()->lockStr(); | 556 Func->getContext()->lockStr(); |
| 556 for (CfgNode *Node : Func->getNodes()) { | 557 for (CfgNode *Node : Func->getNodes()) { |
| 557 // Walk through the instructions, considering each sequence of 3 | 558 // Walk through the instructions, considering each sequence of 3 |
| 558 // instructions, and look for the particular RMW pattern. Note that this | 559 // instructions, and look for the particular RMW pattern. Note that this |
| 559 // search can be "broken" (false negatives) if there are intervening | 560 // search can be "broken" (false negatives) if there are intervening |
| 560 // deleted instructions, or intervening instructions that could be safely | 561 // deleted instructions, or intervening instructions that could be safely |
| 561 // moved out of the way to reveal an RMW pattern. | 562 // moved out of the way to reveal an RMW pattern. |
| 562 auto E = Node->getInsts().end(); | 563 auto E = Node->getInsts().end(); |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 596 // x's live range, and therefore the RMW instruction will be retained and | 597 // x's live range, and therefore the RMW instruction will be retained and |
| 597 // later lowered. On the other hand, if the RMW instruction does not end | 598 // later lowered. On the other hand, if the RMW instruction does not end |
| 598 // x's live range, then the Store instruction must still be present, and | 599 // x's live range, then the Store instruction must still be present, and |
| 599 // therefore the RMW instruction is ignored during lowering because it is | 600 // therefore the RMW instruction is ignored during lowering because it is |
| 600 // redundant with the Store instruction. | 601 // redundant with the Store instruction. |
| 601 // | 602 // |
| 602 // Note that if "a" has further uses, the RMW transformation may still | 603 // Note that if "a" has further uses, the RMW transformation may still |
| 603 // trigger, resulting in two loads and one store, which is worse than the | 604 // trigger, resulting in two loads and one store, which is worse than the |
| 604 // original one load and one store. However, this is probably rare, and | 605 // original one load and one store. However, this is probably rare, and |
| 605 // caching probably keeps it just as fast. | 606 // caching probably keeps it just as fast. |
| 606 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), | 607 if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(), |
| 607 Store->getAddr())) | 608 Store->getAddr())) |
| 608 continue; | 609 continue; |
| 609 Operand *ArithSrcFromLoad = Arith->getSrc(0); | 610 Operand *ArithSrcFromLoad = Arith->getSrc(0); |
| 610 Operand *ArithSrcOther = Arith->getSrc(1); | 611 Operand *ArithSrcOther = Arith->getSrc(1); |
| 611 if (ArithSrcFromLoad != Load->getDest()) { | 612 if (ArithSrcFromLoad != Load->getDest()) { |
| 612 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) | 613 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) |
| 613 continue; | 614 continue; |
| 614 std::swap(ArithSrcFromLoad, ArithSrcOther); | 615 std::swap(ArithSrcFromLoad, ArithSrcOther); |
| 615 } | 616 } |
| 616 if (Arith->getDest() != Store->getData()) | 617 if (Arith->getDest() != Store->getData()) |
| 617 continue; | 618 continue; |
| 618 if (!canRMW(Arith)) | 619 if (!canRMW(Arith)) |
| 619 continue; | 620 continue; |
| 620 if (Func->isVerbose(IceV_RMW)) { | 621 if (Func->isVerbose(IceV_RMW)) { |
| 621 Ostream &Str = Func->getContext()->getStrDump(); | 622 Ostream &Str = Func->getContext()->getStrDump(); |
| 622 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; | 623 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; |
| 623 Load->dump(Func); | 624 Load->dump(Func); |
| 624 Str << "\n "; | 625 Str << "\n "; |
| 625 Arith->dump(Func); | 626 Arith->dump(Func); |
| 626 Str << "\n "; | 627 Str << "\n "; |
| 627 Store->dump(Func); | 628 Store->dump(Func); |
| 628 Str << "\n"; | 629 Str << "\n"; |
| 629 } | 630 } |
| 630 Variable *Beacon = Func->makeVariable(IceType_i32); | 631 Variable *Beacon = Func->makeVariable(IceType_i32); |
| 631 Beacon->setMustNotHaveReg(); | 632 Beacon->setMustNotHaveReg(); |
| 632 Store->setRmwBeacon(Beacon); | 633 Store->setRmwBeacon(Beacon); |
| 633 auto *BeaconDef = InstFakeDef::create(Func, Beacon); | 634 auto *BeaconDef = InstFakeDef::create(Func, Beacon); |
| 634 Node->getInsts().insert(I3, BeaconDef); | 635 Node->getInsts().insert(I3, BeaconDef); |
| 635 auto *RMW = Traits::Insts::FakeRMW::create( | 636 auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(), |
| 636 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); | 637 Beacon, Arith->getOp()); |
| 637 Node->getInsts().insert(I3, RMW); | 638 Node->getInsts().insert(I3, RMW); |
| 638 } | 639 } |
| 639 } | 640 } |
| 640 if (Func->isVerbose(IceV_RMW)) | 641 if (Func->isVerbose(IceV_RMW)) |
| 641 Func->getContext()->unlockStr(); | 642 Func->getContext()->unlockStr(); |
| 642 } | 643 } |
| 643 | 644 |
| 644 // Converts a ConstantInteger32 operand into its constant value, or | 645 // Converts a ConstantInteger32 operand into its constant value, or |
| 645 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 646 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 646 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 647 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 659 Src0 = LoadSrc; | 660 Src0 = LoadSrc; |
| 660 return true; | 661 return true; |
| 661 } | 662 } |
| 662 if (Src0 != LoadDest && Src1 == LoadDest) { | 663 if (Src0 != LoadDest && Src1 == LoadDest) { |
| 663 Src1 = LoadSrc; | 664 Src1 = LoadSrc; |
| 664 return true; | 665 return true; |
| 665 } | 666 } |
| 666 return false; | 667 return false; |
| 667 } | 668 } |
| 668 | 669 |
| 669 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { | 670 template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() { |
| 670 for (CfgNode *Node : Func->getNodes()) { | 671 for (CfgNode *Node : Func->getNodes()) { |
| 671 Context.init(Node); | 672 Context.init(Node); |
| 672 while (!Context.atEnd()) { | 673 while (!Context.atEnd()) { |
| 673 Variable *LoadDest = nullptr; | 674 Variable *LoadDest = nullptr; |
| 674 Operand *LoadSrc = nullptr; | 675 Operand *LoadSrc = nullptr; |
| 675 Inst *CurInst = Context.getCur(); | 676 Inst *CurInst = Context.getCur(); |
| 676 Inst *Next = Context.getNextInst(); | 677 Inst *Next = Context.getNextInst(); |
| 677 // Determine whether the current instruction is a Load instruction or | 678 // Determine whether the current instruction is a Load instruction or |
| 678 // equivalent. | 679 // equivalent. |
| 679 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 680 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 748 NewInst->spliceLivenessInfo(Next, CurInst); | 749 NewInst->spliceLivenessInfo(Next, CurInst); |
| 749 } | 750 } |
| 750 } | 751 } |
| 751 Context.advanceCur(); | 752 Context.advanceCur(); |
| 752 Context.advanceNext(); | 753 Context.advanceNext(); |
| 753 } | 754 } |
| 754 } | 755 } |
| 755 Func->dump("After load optimization"); | 756 Func->dump("After load optimization"); |
| 756 } | 757 } |
| 757 | 758 |
| 758 template <class Machine> | 759 template <typename TraitsType> |
| 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 760 bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 761 if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) { |
| 761 return Br->optimizeBranch(NextNode); | 762 return Br->optimizeBranch(NextNode); |
| 762 } | 763 } |
| 763 return false; | 764 return false; |
| 764 } | 765 } |
| 765 | 766 |
| 766 template <class Machine> | 767 template <typename TraitsType> |
| 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 768 Variable *TargetX86Base<TraitsType>::getPhysicalRegister(SizeT RegNum, |
| 769 Type Ty) { | |
| 768 if (Ty == IceType_void) | 770 if (Ty == IceType_void) |
| 769 Ty = IceType_i32; | 771 Ty = IceType_i32; |
| 770 if (PhysicalRegisters[Ty].empty()) | 772 if (PhysicalRegisters[Ty].empty()) |
| 771 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 773 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
| 772 assert(RegNum < PhysicalRegisters[Ty].size()); | 774 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 773 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 775 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 774 if (Reg == nullptr) { | 776 if (Reg == nullptr) { |
| 775 Reg = Func->makeVariable(Ty); | 777 Reg = Func->makeVariable(Ty); |
| 776 Reg->setRegNum(RegNum); | 778 Reg->setRegNum(RegNum); |
| 777 PhysicalRegisters[Ty][RegNum] = Reg; | 779 PhysicalRegisters[Ty][RegNum] = Reg; |
| 778 // Specially mark a named physical register as an "argument" so that it is | 780 // Specially mark a named physical register as an "argument" so that it is |
| 779 // considered live upon function entry. Otherwise it's possible to get | 781 // considered live upon function entry. Otherwise it's possible to get |
| 780 // liveness validation errors for saving callee-save registers. | 782 // liveness validation errors for saving callee-save registers. |
| 781 Func->addImplicitArg(Reg); | 783 Func->addImplicitArg(Reg); |
| 782 // Don't bother tracking the live range of a named physical register. | 784 // Don't bother tracking the live range of a named physical register. |
| 783 Reg->setIgnoreLiveness(); | 785 Reg->setIgnoreLiveness(); |
| 784 } | 786 } |
| 785 assert(Traits::getGprForType(Ty, RegNum) == static_cast<int32_t>(RegNum)); | 787 assert(Traits::getGprForType(Ty, RegNum) == static_cast<int32_t>(RegNum)); |
| 786 return Reg; | 788 return Reg; |
| 787 } | 789 } |
| 788 | 790 |
| 789 template <class Machine> | 791 template <typename TraitsType> |
| 790 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 792 IceString TargetX86Base<TraitsType>::getRegName(SizeT RegNum, Type Ty) const { |
| 791 return Traits::getRegName(Traits::getGprForType(Ty, RegNum)); | 793 return Traits::getRegName(Traits::getGprForType(Ty, RegNum)); |
| 792 } | 794 } |
| 793 | 795 |
| 794 template <class Machine> | 796 template <typename TraitsType> |
| 795 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | 797 void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const { |
| 796 if (!BuildDefs::dump()) | 798 if (!BuildDefs::dump()) |
| 797 return; | 799 return; |
| 798 Ostream &Str = Ctx->getStrEmit(); | 800 Ostream &Str = Ctx->getStrEmit(); |
| 799 if (Var->hasReg()) { | 801 if (Var->hasReg()) { |
| 800 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 802 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
| 801 return; | 803 return; |
| 802 } | 804 } |
| 803 if (Var->mustHaveReg()) { | 805 if (Var->mustHaveReg()) { |
| 804 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 806 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 805 } | 807 } |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 816 if (DecorateAsm) { | 818 if (DecorateAsm) { |
| 817 Str << Var->getSymbolicStackOffset(Func); | 819 Str << Var->getSymbolicStackOffset(Func); |
| 818 } else { | 820 } else { |
| 819 Str << Offset; | 821 Str << Offset; |
| 820 } | 822 } |
| 821 } | 823 } |
| 822 const Type FrameSPTy = Traits::WordType; | 824 const Type FrameSPTy = Traits::WordType; |
| 823 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; | 825 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; |
| 824 } | 826 } |
| 825 | 827 |
| 826 template <class Machine> | 828 template <typename TraitsType> |
| 827 typename TargetX86Base<Machine>::Traits::Address | 829 typename TargetX86Base<TraitsType>::X86Address |
| 828 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | 830 TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const { |
| 829 if (Var->hasReg()) | 831 if (Var->hasReg()) |
| 830 llvm_unreachable("Stack Variable has a register assigned"); | 832 llvm_unreachable("Stack Variable has a register assigned"); |
| 831 if (Var->mustHaveReg()) { | 833 if (Var->mustHaveReg()) { |
| 832 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 834 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 833 } | 835 } |
| 834 int32_t Offset = Var->getStackOffset(); | 836 int32_t Offset = Var->getStackOffset(); |
| 835 int32_t BaseRegNum = Var->getBaseRegNum(); | 837 int32_t BaseRegNum = Var->getBaseRegNum(); |
| 836 if (Var->getBaseRegNum() == Variable::NoRegister) | 838 if (Var->getBaseRegNum() == Variable::NoRegister) |
| 837 BaseRegNum = getFrameOrStackReg(); | 839 BaseRegNum = getFrameOrStackReg(); |
| 838 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset, | 840 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset, |
| 839 AssemblerFixup::NoFixup); | 841 AssemblerFixup::NoFixup); |
| 840 } | 842 } |
| 841 | 843 |
| 842 /// Helper function for addProlog(). | 844 /// Helper function for addProlog(). |
| 843 /// | 845 /// |
| 844 /// This assumes Arg is an argument passed on the stack. This sets the frame | 846 /// This assumes Arg is an argument passed on the stack. This sets the frame |
| 845 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 847 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
| 846 /// I64 arg that has been split into Lo and Hi components, it calls itself | 848 /// I64 arg that has been split into Lo and Hi components, it calls itself |
| 847 /// recursively on the components, taking care to handle Lo first because of the | 849 /// recursively on the components, taking care to handle Lo first because of the |
| 848 /// little-endian architecture. Lastly, this function generates an instruction | 850 /// little-endian architecture. Lastly, this function generates an instruction |
| 849 /// to copy Arg into its assigned register if applicable. | 851 /// to copy Arg into its assigned register if applicable. |
| 850 template <class Machine> | 852 template <typename TraitsType> |
| 851 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 853 void TargetX86Base<TraitsType>::finishArgumentLowering( |
| 852 Variable *FramePtr, | 854 Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, |
| 853 size_t BasicFrameOffset, | 855 size_t StackAdjBytes, size_t &InArgsSizeBytes) { |
| 854 size_t StackAdjBytes, | |
| 855 size_t &InArgsSizeBytes) { | |
| 856 if (!Traits::Is64Bit) { | 856 if (!Traits::Is64Bit) { |
| 857 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { | 857 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
| 858 Variable *Lo = Arg64On32->getLo(); | 858 Variable *Lo = Arg64On32->getLo(); |
| 859 Variable *Hi = Arg64On32->getHi(); | 859 Variable *Hi = Arg64On32->getHi(); |
| 860 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes, | 860 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes, |
| 861 InArgsSizeBytes); | 861 InArgsSizeBytes); |
| 862 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes, | 862 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes, |
| 863 InArgsSizeBytes); | 863 InArgsSizeBytes); |
| 864 return; | 864 return; |
| 865 } | 865 } |
| 866 } | 866 } |
| 867 Type Ty = Arg->getType(); | 867 Type Ty = Arg->getType(); |
| 868 if (isVectorType(Ty)) { | 868 if (isVectorType(Ty)) { |
| 869 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 869 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 870 } | 870 } |
| 871 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 871 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 872 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 872 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 873 if (Arg->hasReg()) { | 873 if (Arg->hasReg()) { |
| 874 assert(Ty != IceType_i64 || Traits::Is64Bit); | 874 assert(Ty != IceType_i64 || Traits::Is64Bit); |
| 875 auto *Mem = Traits::X86OperandMem::create( | 875 auto *Mem = X86OperandMem::create( |
| 876 Func, Ty, FramePtr, | 876 Func, Ty, FramePtr, |
| 877 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes)); | 877 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes)); |
| 878 if (isVectorType(Arg->getType())) { | 878 if (isVectorType(Arg->getType())) { |
| 879 _movp(Arg, Mem); | 879 _movp(Arg, Mem); |
| 880 } else { | 880 } else { |
| 881 _mov(Arg, Mem); | 881 _mov(Arg, Mem); |
| 882 } | 882 } |
| 883 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 883 // This argument-copying instruction uses an explicit X86OperandMem |
| 884 // operand instead of a Variable, so its fill-from-stack operation has to | 884 // operand instead of a Variable, so its fill-from-stack operation has to |
| 885 // be tracked separately for statistics. | 885 // be tracked separately for statistics. |
| 886 Ctx->statsUpdateFills(); | 886 Ctx->statsUpdateFills(); |
| 887 } | 887 } |
| 888 } | 888 } |
| 889 | 889 |
| 890 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 890 template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() { |
| 891 return Traits::WordType; | 891 return Traits::WordType; |
| 892 } | 892 } |
| 893 | 893 |
| 894 template <class Machine> | 894 template <typename TraitsType> |
| 895 template <typename T> | 895 template <typename T> |
| 896 typename std::enable_if<!T::Is64Bit, Operand>::type * | 896 typename std::enable_if<!T::Is64Bit, Operand>::type * |
| 897 TargetX86Base<Machine>::loOperand(Operand *Operand) { | 897 TargetX86Base<TraitsType>::loOperand(Operand *Operand) { |
| 898 assert(Operand->getType() == IceType_i64 || | 898 assert(Operand->getType() == IceType_i64 || |
| 899 Operand->getType() == IceType_f64); | 899 Operand->getType() == IceType_f64); |
| 900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 901 return Operand; | 901 return Operand; |
| 902 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) | 902 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
| 903 return Var64On32->getLo(); | 903 return Var64On32->getLo(); |
| 904 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 904 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 905 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 905 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 906 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 906 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
| 907 // Check if we need to blind/pool the constant. | 907 // Check if we need to blind/pool the constant. |
| 908 return legalize(ConstInt); | 908 return legalize(ConstInt); |
| 909 } | 909 } |
| 910 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 910 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) { |
| 911 auto *MemOperand = Traits::X86OperandMem::create( | 911 auto *MemOperand = X86OperandMem::create( |
| 912 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 912 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
| 913 Mem->getShift(), Mem->getSegmentRegister()); | 913 Mem->getShift(), Mem->getSegmentRegister()); |
| 914 // Test if we should randomize or pool the offset, if so randomize it or | 914 // Test if we should randomize or pool the offset, if so randomize it or |
| 915 // pool it then create mem operand with the blinded/pooled constant. | 915 // pool it then create mem operand with the blinded/pooled constant. |
| 916 // Otherwise, return the mem operand as ordinary mem operand. | 916 // Otherwise, return the mem operand as ordinary mem operand. |
| 917 return legalize(MemOperand); | 917 return legalize(MemOperand); |
| 918 } | 918 } |
| 919 llvm_unreachable("Unsupported operand type"); | 919 llvm_unreachable("Unsupported operand type"); |
| 920 return nullptr; | 920 return nullptr; |
| 921 } | 921 } |
| 922 | 922 |
| 923 template <class Machine> | 923 template <typename TraitsType> |
| 924 template <typename T> | 924 template <typename T> |
| 925 typename std::enable_if<!T::Is64Bit, Operand>::type * | 925 typename std::enable_if<!T::Is64Bit, Operand>::type * |
| 926 TargetX86Base<Machine>::hiOperand(Operand *Operand) { | 926 TargetX86Base<TraitsType>::hiOperand(Operand *Operand) { |
| 927 assert(Operand->getType() == IceType_i64 || | 927 assert(Operand->getType() == IceType_i64 || |
| 928 Operand->getType() == IceType_f64); | 928 Operand->getType() == IceType_f64); |
| 929 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 929 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 930 return Operand; | 930 return Operand; |
| 931 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) | 931 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
| 932 return Var64On32->getHi(); | 932 return Var64On32->getHi(); |
| 933 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 933 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 934 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 934 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 935 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); | 935 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); |
| 936 // Check if we need to blind/pool the constant. | 936 // Check if we need to blind/pool the constant. |
| 937 return legalize(ConstInt); | 937 return legalize(ConstInt); |
| 938 } | 938 } |
| 939 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 939 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) { |
| 940 Constant *Offset = Mem->getOffset(); | 940 Constant *Offset = Mem->getOffset(); |
| 941 if (Offset == nullptr) { | 941 if (Offset == nullptr) { |
| 942 Offset = Ctx->getConstantInt32(4); | 942 Offset = Ctx->getConstantInt32(4); |
| 943 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) { | 943 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) { |
| 944 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); | 944 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); |
| 945 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) { | 945 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) { |
| 946 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); | 946 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); |
| 947 Offset = | 947 Offset = |
| 948 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), | 948 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), |
| 949 SymOffset->getSuppressMangling()); | 949 SymOffset->getSuppressMangling()); |
| 950 } | 950 } |
| 951 auto *MemOperand = Traits::X86OperandMem::create( | 951 auto *MemOperand = X86OperandMem::create( |
| 952 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), | 952 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), |
| 953 Mem->getShift(), Mem->getSegmentRegister()); | 953 Mem->getShift(), Mem->getSegmentRegister()); |
| 954 // Test if the Offset is an eligible i32 constants for randomization and | 954 // Test if the Offset is an eligible i32 constants for randomization and |
| 955 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem | 955 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem |
| 956 // operand. | 956 // operand. |
| 957 return legalize(MemOperand); | 957 return legalize(MemOperand); |
| 958 } | 958 } |
| 959 llvm_unreachable("Unsupported operand type"); | 959 llvm_unreachable("Unsupported operand type"); |
| 960 return nullptr; | 960 return nullptr; |
| 961 } | 961 } |
| 962 | 962 |
| 963 template <class Machine> | 963 template <typename TraitsType> |
| 964 llvm::SmallBitVector | 964 llvm::SmallBitVector |
| 965 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | 965 TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include, |
| 966 RegSetMask Exclude) const { | 966 RegSetMask Exclude) const { |
| 967 return Traits::getRegisterSet(Include, Exclude); | 967 return Traits::getRegisterSet(Include, Exclude); |
| 968 } | 968 } |
| 969 | 969 |
| 970 template <class Machine> | 970 template <typename TraitsType> |
| 971 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | 971 void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Inst) { |
| 972 // Conservatively require the stack to be aligned. Some stack adjustment | 972 // Conservatively require the stack to be aligned. Some stack adjustment |
| 973 // operations implemented below assume that the stack is aligned before the | 973 // operations implemented below assume that the stack is aligned before the |
| 974 // alloca. All the alloca code ensures that the stack alignment is preserved | 974 // alloca. All the alloca code ensures that the stack alignment is preserved |
| 975 // after the alloca. The stack alignment restriction can be relaxed in some | 975 // after the alloca. The stack alignment restriction can be relaxed in some |
| 976 // cases. | 976 // cases. |
| 977 NeedsStackAlignment = true; | 977 NeedsStackAlignment = true; |
| 978 | 978 |
| 979 // For default align=0, set it to the real value 1, to avoid any | 979 // For default align=0, set it to the real value 1, to avoid any |
| 980 // bit-manipulation problems below. | 980 // bit-manipulation problems below. |
| 981 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes()); | 981 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes()); |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1027 _mov(T, TotalSize); | 1027 _mov(T, TotalSize); |
| 1028 } | 1028 } |
| 1029 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1029 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1030 _and(T, Ctx->getConstantInt32(-Alignment)); | 1030 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1031 _sub(esp, T); | 1031 _sub(esp, T); |
| 1032 } | 1032 } |
| 1033 // Add enough to the returned address to account for the out args area. | 1033 // Add enough to the returned address to account for the out args area. |
| 1034 uint32_t OutArgsSize = maxOutArgsSizeBytes(); | 1034 uint32_t OutArgsSize = maxOutArgsSizeBytes(); |
| 1035 if (OutArgsSize > 0) { | 1035 if (OutArgsSize > 0) { |
| 1036 Variable *T = makeReg(IceType_i32); | 1036 Variable *T = makeReg(IceType_i32); |
| 1037 typename Traits::X86OperandMem *CalculateOperand = | 1037 auto *CalculateOperand = X86OperandMem::create( |
| 1038 Traits::X86OperandMem::create( | 1038 Func, IceType_i32, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); |
| 1039 Func, IceType_i32, esp, | |
| 1040 Ctx->getConstantInt(IceType_i32, OutArgsSize)); | |
| 1041 _lea(T, CalculateOperand); | 1039 _lea(T, CalculateOperand); |
| 1042 _mov(Dest, T); | 1040 _mov(Dest, T); |
| 1043 } else { | 1041 } else { |
| 1044 _mov(Dest, esp); | 1042 _mov(Dest, esp); |
| 1045 } | 1043 } |
| 1046 } | 1044 } |
| 1047 | 1045 |
| 1048 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1046 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
| 1049 /// narrower) for certain constants. The lea instruction can be used to multiply | 1047 /// narrower) for certain constants. The lea instruction can be used to multiply |
| 1050 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1048 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
| 1051 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1049 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
| 1052 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1050 /// lea-based multiplies by 5, combined with left-shifting by 2. |
| 1053 template <class Machine> | 1051 template <typename TraitsType> |
| 1054 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1052 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1055 int32_t Src1) { | 1053 int32_t Src1) { |
| 1056 // Disable this optimization for Om1 and O0, just to keep things simple | 1054 // Disable this optimization for Om1 and O0, just to keep things simple |
| 1057 // there. | 1055 // there. |
| 1058 if (Ctx->getFlags().getOptLevel() < Opt_1) | 1056 if (Ctx->getFlags().getOptLevel() < Opt_1) |
| 1059 return false; | 1057 return false; |
| 1060 Type Ty = Dest->getType(); | 1058 Type Ty = Dest->getType(); |
| 1061 Variable *T = nullptr; | 1059 Variable *T = nullptr; |
| 1062 if (Src1 == -1) { | 1060 if (Src1 == -1) { |
| 1063 _mov(T, Src0); | 1061 _mov(T, Src0); |
| 1064 _neg(T); | 1062 _neg(T); |
| 1065 _mov(Dest, T); | 1063 _mov(Dest, T); |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1112 return false; | 1110 return false; |
| 1113 // Limit the number of lea/shl operations for a single multiply, to a | 1111 // Limit the number of lea/shl operations for a single multiply, to a |
| 1114 // somewhat arbitrary choice of 3. | 1112 // somewhat arbitrary choice of 3. |
| 1115 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1113 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
| 1116 if (CountOps > MaxOpsForOptimizedMul) | 1114 if (CountOps > MaxOpsForOptimizedMul) |
| 1117 return false; | 1115 return false; |
| 1118 _mov(T, Src0); | 1116 _mov(T, Src0); |
| 1119 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1117 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1120 for (uint32_t i = 0; i < Count9; ++i) { | 1118 for (uint32_t i = 0; i < Count9; ++i) { |
| 1121 constexpr uint16_t Shift = 3; // log2(9-1) | 1119 constexpr uint16_t Shift = 3; // log2(9-1) |
| 1122 _lea(T, | 1120 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1123 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | |
| 1124 } | 1121 } |
| 1125 for (uint32_t i = 0; i < Count5; ++i) { | 1122 for (uint32_t i = 0; i < Count5; ++i) { |
| 1126 constexpr uint16_t Shift = 2; // log2(5-1) | 1123 constexpr uint16_t Shift = 2; // log2(5-1) |
| 1127 _lea(T, | 1124 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1128 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | |
| 1129 } | 1125 } |
| 1130 for (uint32_t i = 0; i < Count3; ++i) { | 1126 for (uint32_t i = 0; i < Count3; ++i) { |
| 1131 constexpr uint16_t Shift = 1; // log2(3-1) | 1127 constexpr uint16_t Shift = 1; // log2(3-1) |
| 1132 _lea(T, | 1128 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1133 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | |
| 1134 } | 1129 } |
| 1135 if (Count2) { | 1130 if (Count2) { |
| 1136 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1131 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
| 1137 } | 1132 } |
| 1138 if (Src1IsNegative) | 1133 if (Src1IsNegative) |
| 1139 _neg(T); | 1134 _neg(T); |
| 1140 _mov(Dest, T); | 1135 _mov(Dest, T); |
| 1141 return true; | 1136 return true; |
| 1142 } | 1137 } |
| 1143 | 1138 |
| 1144 template <class Machine> | 1139 template <typename TraitsType> |
| 1145 void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op, | 1140 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, |
| 1146 Operand *Src0Lo, Operand *Src0Hi, | 1141 Operand *Src0Lo, Operand *Src0Hi, |
| 1147 Operand *Src1Lo, Variable *DestLo, | 1142 Operand *Src1Lo, Variable *DestLo, |
| 1148 Variable *DestHi) { | 1143 Variable *DestHi) { |
| 1149 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | 1144 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. |
| 1150 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1145 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1151 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1146 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1152 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1147 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| 1153 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | 1148 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| 1154 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); | 1149 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| 1155 if (ShiftAmount > 32) { | 1150 if (ShiftAmount > 32) { |
| 1156 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); | 1151 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); |
| 1157 switch (Op) { | 1152 switch (Op) { |
| 1158 default: | 1153 default: |
| (...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1267 } | 1262 } |
| 1268 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> | 1263 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> |
| 1269 // a.lo = t2 | 1264 // a.lo = t2 |
| 1270 // a.hi = t3 | 1265 // a.hi = t3 |
| 1271 _mov(DestLo, T_2); | 1266 _mov(DestLo, T_2); |
| 1272 _mov(DestHi, T_3); | 1267 _mov(DestHi, T_3); |
| 1273 } | 1268 } |
| 1274 } else { | 1269 } else { |
| 1275 // NON-CONSTANT CASES. | 1270 // NON-CONSTANT CASES. |
| 1276 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1271 Constant *BitTest = Ctx->getConstantInt32(0x20); |
| 1277 typename Traits::Insts::Label *Label = | 1272 InstX86Label *Label = InstX86Label::create(Func, this); |
| 1278 Traits::Insts::Label::create(Func, this); | |
| 1279 // COMMON PREFIX OF: a=b SHIFT_OP c ==> | 1273 // COMMON PREFIX OF: a=b SHIFT_OP c ==> |
| 1280 // t1:ecx = c.lo & 0xff | 1274 // t1:ecx = c.lo & 0xff |
| 1281 // t2 = b.lo | 1275 // t2 = b.lo |
| 1282 // t3 = b.hi | 1276 // t3 = b.hi |
| 1283 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl); | 1277 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl); |
| 1284 _mov(T_2, Src0Lo); | 1278 _mov(T_2, Src0Lo); |
| 1285 _mov(T_3, Src0Hi); | 1279 _mov(T_3, Src0Hi); |
| 1286 switch (Op) { | 1280 switch (Op) { |
| 1287 default: | 1281 default: |
| 1288 assert(0 && "non-shift op"); | 1282 assert(0 && "non-shift op"); |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1348 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> | 1342 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> |
| 1349 // L1: | 1343 // L1: |
| 1350 // a.lo = t2 | 1344 // a.lo = t2 |
| 1351 // a.hi = t3 | 1345 // a.hi = t3 |
| 1352 Context.insert(Label); | 1346 Context.insert(Label); |
| 1353 _mov(DestLo, T_2); | 1347 _mov(DestLo, T_2); |
| 1354 _mov(DestHi, T_3); | 1348 _mov(DestHi, T_3); |
| 1355 } | 1349 } |
| 1356 } | 1350 } |
| 1357 | 1351 |
| 1358 template <class Machine> | 1352 template <typename TraitsType> |
| 1359 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1353 void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) { |
| 1360 Variable *Dest = Inst->getDest(); | 1354 Variable *Dest = Inst->getDest(); |
| 1361 if (Dest->isRematerializable()) { | 1355 if (Dest->isRematerializable()) { |
| 1362 Context.insert<InstFakeDef>(Dest); | 1356 Context.insert<InstFakeDef>(Dest); |
| 1363 return; | 1357 return; |
| 1364 } | 1358 } |
| 1365 Type Ty = Dest->getType(); | 1359 Type Ty = Dest->getType(); |
| 1366 Operand *Src0 = legalize(Inst->getSrc(0)); | 1360 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1367 Operand *Src1 = legalize(Inst->getSrc(1)); | 1361 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1368 if (Inst->isCommutative()) { | 1362 if (Inst->isCommutative()) { |
| 1369 uint32_t SwapCount = 0; | 1363 uint32_t SwapCount = 0; |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1501 case InstArithmetic::Srem: | 1495 case InstArithmetic::Srem: |
| 1502 llvm_unreachable("Call-helper-involved instruction for i64 type \ | 1496 llvm_unreachable("Call-helper-involved instruction for i64 type \ |
| 1503 should have already been handled before"); | 1497 should have already been handled before"); |
| 1504 break; | 1498 break; |
| 1505 } | 1499 } |
| 1506 return; | 1500 return; |
| 1507 } | 1501 } |
| 1508 if (isVectorType(Ty)) { | 1502 if (isVectorType(Ty)) { |
| 1509 // TODO: Trap on integer divide and integer modulo by zero. See: | 1503 // TODO: Trap on integer divide and integer modulo by zero. See: |
| 1510 // https://code.google.com/p/nativeclient/issues/detail?id=3899 | 1504 // https://code.google.com/p/nativeclient/issues/detail?id=3899 |
| 1511 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) | 1505 if (llvm::isa<X86OperandMem>(Src1)) |
| 1512 Src1 = legalizeToReg(Src1); | 1506 Src1 = legalizeToReg(Src1); |
| 1513 switch (Inst->getOp()) { | 1507 switch (Inst->getOp()) { |
| 1514 case InstArithmetic::_num: | 1508 case InstArithmetic::_num: |
| 1515 llvm_unreachable("Unknown arithmetic operator"); | 1509 llvm_unreachable("Unknown arithmetic operator"); |
| 1516 break; | 1510 break; |
| 1517 case InstArithmetic::Add: { | 1511 case InstArithmetic::Add: { |
| 1518 Variable *T = makeReg(Ty); | 1512 Variable *T = makeReg(Ty); |
| 1519 _movp(T, Src0); | 1513 _movp(T, Src0); |
| 1520 _padd(T, Src1); | 1514 _padd(T, Src1); |
| 1521 _movp(Dest, T); | 1515 _movp(Dest, T); |
| (...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1920 _mov(T, Src0); | 1914 _mov(T, Src0); |
| 1921 _divss(T, Src1); | 1915 _divss(T, Src1); |
| 1922 _mov(Dest, T); | 1916 _mov(Dest, T); |
| 1923 break; | 1917 break; |
| 1924 case InstArithmetic::Frem: | 1918 case InstArithmetic::Frem: |
| 1925 llvm::report_fatal_error("Helper call was expected"); | 1919 llvm::report_fatal_error("Helper call was expected"); |
| 1926 break; | 1920 break; |
| 1927 } | 1921 } |
| 1928 } | 1922 } |
| 1929 | 1923 |
| 1930 template <class Machine> | 1924 template <typename TraitsType> |
| 1931 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1925 void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Inst) { |
| 1932 Variable *Dest = Inst->getDest(); | 1926 Variable *Dest = Inst->getDest(); |
| 1933 if (Dest->isRematerializable()) { | 1927 if (Dest->isRematerializable()) { |
| 1934 Context.insert<InstFakeDef>(Dest); | 1928 Context.insert<InstFakeDef>(Dest); |
| 1935 return; | 1929 return; |
| 1936 } | 1930 } |
| 1937 Operand *Src = Inst->getSrc(0); | 1931 Operand *Src = Inst->getSrc(0); |
| 1938 assert(Dest->getType() == Src->getType()); | 1932 assert(Dest->getType() == Src->getType()); |
| 1939 lowerMove(Dest, Src, false); | 1933 lowerMove(Dest, Src, false); |
| 1940 } | 1934 } |
| 1941 | 1935 |
| 1942 template <class Machine> | 1936 template <typename TraitsType> |
| 1943 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { | 1937 void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) { |
| 1944 if (Br->isUnconditional()) { | 1938 if (Br->isUnconditional()) { |
| 1945 _br(Br->getTargetUnconditional()); | 1939 _br(Br->getTargetUnconditional()); |
| 1946 return; | 1940 return; |
| 1947 } | 1941 } |
| 1948 Operand *Cond = Br->getCondition(); | 1942 Operand *Cond = Br->getCondition(); |
| 1949 | 1943 |
| 1950 // Handle folding opportunities. | 1944 // Handle folding opportunities. |
| 1951 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { | 1945 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| 1952 assert(Producer->isDeleted()); | 1946 assert(Producer->isDeleted()); |
| 1953 switch (BoolFolding::getProducerKind(Producer)) { | 1947 switch (BoolFolding::getProducerKind(Producer)) { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 1967 return; | 1961 return; |
| 1968 } | 1962 } |
| 1969 } | 1963 } |
| 1970 } | 1964 } |
| 1971 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 1965 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 1972 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1966 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1973 _cmp(Src0, Zero); | 1967 _cmp(Src0, Zero); |
| 1974 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 1968 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 1975 } | 1969 } |
| 1976 | 1970 |
| 1977 template <class Machine> | 1971 template <typename TraitsType> |
| 1978 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 1972 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { |
| 1979 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 1973 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
| 1980 InstCast::OpKind CastKind = Inst->getCastKind(); | 1974 InstCast::OpKind CastKind = Inst->getCastKind(); |
| 1981 Variable *Dest = Inst->getDest(); | 1975 Variable *Dest = Inst->getDest(); |
| 1982 Type DestTy = Dest->getType(); | 1976 Type DestTy = Dest->getType(); |
| 1983 switch (CastKind) { | 1977 switch (CastKind) { |
| 1984 default: | 1978 default: |
| 1985 Func->setError("Cast type not supported"); | 1979 Func->setError("Cast type not supported"); |
| 1986 return; | 1980 return; |
| 1987 case InstCast::Sext: { | 1981 case InstCast::Sext: { |
| 1988 // Src0RM is the source operand legalized to physical register or memory, | 1982 // Src0RM is the source operand legalized to physical register or memory, |
| (...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2146 Variable *T = makeReg(DestTy); | 2140 Variable *T = makeReg(DestTy); |
| 2147 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); | 2141 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); |
| 2148 _mov(Dest, T); | 2142 _mov(Dest, T); |
| 2149 break; | 2143 break; |
| 2150 } | 2144 } |
| 2151 case InstCast::Fptosi: | 2145 case InstCast::Fptosi: |
| 2152 if (isVectorType(DestTy)) { | 2146 if (isVectorType(DestTy)) { |
| 2153 assert(DestTy == IceType_v4i32 && | 2147 assert(DestTy == IceType_v4i32 && |
| 2154 Inst->getSrc(0)->getType() == IceType_v4f32); | 2148 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2155 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2149 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2156 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2150 if (llvm::isa<X86OperandMem>(Src0RM)) |
| 2157 Src0RM = legalizeToReg(Src0RM); | 2151 Src0RM = legalizeToReg(Src0RM); |
| 2158 Variable *T = makeReg(DestTy); | 2152 Variable *T = makeReg(DestTy); |
| 2159 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2153 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2160 _movp(Dest, T); | 2154 _movp(Dest, T); |
| 2161 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { | 2155 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 2162 llvm::report_fatal_error("Helper call was expected"); | 2156 llvm::report_fatal_error("Helper call was expected"); |
| 2163 } else { | 2157 } else { |
| 2164 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2158 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2165 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2159 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2166 Variable *T_1 = nullptr; | 2160 Variable *T_1 = nullptr; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2212 if (DestTy == IceType_i1) | 2206 if (DestTy == IceType_i1) |
| 2213 _and(T_2, Ctx->getConstantInt1(1)); | 2207 _and(T_2, Ctx->getConstantInt1(1)); |
| 2214 _mov(Dest, T_2); | 2208 _mov(Dest, T_2); |
| 2215 } | 2209 } |
| 2216 break; | 2210 break; |
| 2217 case InstCast::Sitofp: | 2211 case InstCast::Sitofp: |
| 2218 if (isVectorType(DestTy)) { | 2212 if (isVectorType(DestTy)) { |
| 2219 assert(DestTy == IceType_v4f32 && | 2213 assert(DestTy == IceType_v4f32 && |
| 2220 Inst->getSrc(0)->getType() == IceType_v4i32); | 2214 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2221 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2215 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2222 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2216 if (llvm::isa<X86OperandMem>(Src0RM)) |
| 2223 Src0RM = legalizeToReg(Src0RM); | 2217 Src0RM = legalizeToReg(Src0RM); |
| 2224 Variable *T = makeReg(DestTy); | 2218 Variable *T = makeReg(DestTy); |
| 2225 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2219 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
| 2226 _movp(Dest, T); | 2220 _movp(Dest, T); |
| 2227 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2221 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
| 2228 llvm::report_fatal_error("Helper call was expected"); | 2222 llvm::report_fatal_error("Helper call was expected"); |
| 2229 } else { | 2223 } else { |
| 2230 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2224 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2231 // Sign-extend the operand. | 2225 // Sign-extend the operand. |
| 2232 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2226 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2297 Type SrcType = Src0RM->getType(); | 2291 Type SrcType = Src0RM->getType(); |
| 2298 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || | 2292 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || |
| 2299 (DestTy == IceType_f32 && SrcType == IceType_i32)); | 2293 (DestTy == IceType_f32 && SrcType == IceType_i32)); |
| 2300 // a.i32 = bitcast b.f32 ==> | 2294 // a.i32 = bitcast b.f32 ==> |
| 2301 // t.f32 = b.f32 | 2295 // t.f32 = b.f32 |
| 2302 // s.f32 = spill t.f32 | 2296 // s.f32 = spill t.f32 |
| 2303 // a.i32 = s.f32 | 2297 // a.i32 = s.f32 |
| 2304 Variable *T = nullptr; | 2298 Variable *T = nullptr; |
| 2305 // TODO: Should be able to force a spill setup by calling legalize() with | 2299 // TODO: Should be able to force a spill setup by calling legalize() with |
| 2306 // Legal_Mem and not Legal_Reg or Legal_Imm. | 2300 // Legal_Mem and not Legal_Reg or Legal_Imm. |
| 2307 typename Traits::SpillVariable *SpillVar = | 2301 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType); |
| 2308 Func->makeVariable<typename Traits::SpillVariable>(SrcType); | |
| 2309 SpillVar->setLinkedTo(Dest); | 2302 SpillVar->setLinkedTo(Dest); |
| 2310 Variable *Spill = SpillVar; | 2303 Variable *Spill = SpillVar; |
| 2311 Spill->setMustNotHaveReg(); | 2304 Spill->setMustNotHaveReg(); |
| 2312 _mov(T, Src0RM); | 2305 _mov(T, Src0RM); |
| 2313 _mov(Spill, T); | 2306 _mov(Spill, T); |
| 2314 _mov(Dest, Spill); | 2307 _mov(Dest, Spill); |
| 2315 } break; | 2308 } break; |
| 2316 case IceType_i64: { | 2309 case IceType_i64: { |
| 2317 assert(Src0->getType() == IceType_f64); | 2310 assert(Src0->getType() == IceType_f64); |
| 2318 if (Traits::Is64Bit) { | 2311 if (Traits::Is64Bit) { |
| 2319 Variable *Src0R = legalizeToReg(Src0); | 2312 Variable *Src0R = legalizeToReg(Src0); |
| 2320 Variable *T = makeReg(IceType_i64); | 2313 Variable *T = makeReg(IceType_i64); |
| 2321 _movd(T, Src0R); | 2314 _movd(T, Src0R); |
| 2322 _mov(Dest, T); | 2315 _mov(Dest, T); |
| 2323 } else { | 2316 } else { |
| 2324 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2317 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2325 // a.i64 = bitcast b.f64 ==> | 2318 // a.i64 = bitcast b.f64 ==> |
| 2326 // s.f64 = spill b.f64 | 2319 // s.f64 = spill b.f64 |
| 2327 // t_lo.i32 = lo(s.f64) | 2320 // t_lo.i32 = lo(s.f64) |
| 2328 // a_lo.i32 = t_lo.i32 | 2321 // a_lo.i32 = t_lo.i32 |
| 2329 // t_hi.i32 = hi(s.f64) | 2322 // t_hi.i32 = hi(s.f64) |
| 2330 // a_hi.i32 = t_hi.i32 | 2323 // a_hi.i32 = t_hi.i32 |
| 2331 Operand *SpillLo, *SpillHi; | 2324 Operand *SpillLo, *SpillHi; |
| 2332 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { | 2325 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { |
| 2333 typename Traits::SpillVariable *SpillVar = | 2326 SpillVariable *SpillVar = |
| 2334 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | 2327 Func->makeVariable<SpillVariable>(IceType_f64); |
| 2335 SpillVar->setLinkedTo(Src0Var); | 2328 SpillVar->setLinkedTo(Src0Var); |
| 2336 Variable *Spill = SpillVar; | 2329 Variable *Spill = SpillVar; |
| 2337 Spill->setMustNotHaveReg(); | 2330 Spill->setMustNotHaveReg(); |
| 2338 _movq(Spill, Src0RM); | 2331 _movq(Spill, Src0RM); |
| 2339 SpillLo = Traits::VariableSplit::create(Func, Spill, | 2332 SpillLo = Traits::VariableSplit::create(Func, Spill, |
| 2340 Traits::VariableSplit::Low); | 2333 Traits::VariableSplit::Low); |
| 2341 SpillHi = Traits::VariableSplit::create(Func, Spill, | 2334 SpillHi = Traits::VariableSplit::create(Func, Spill, |
| 2342 Traits::VariableSplit::High); | 2335 Traits::VariableSplit::High); |
| 2343 } else { | 2336 } else { |
| 2344 SpillLo = loOperand(Src0RM); | 2337 SpillLo = loOperand(Src0RM); |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 2358 } break; | 2351 } break; |
| 2359 case IceType_f64: { | 2352 case IceType_f64: { |
| 2360 assert(Src0->getType() == IceType_i64); | 2353 assert(Src0->getType() == IceType_i64); |
| 2361 if (Traits::Is64Bit) { | 2354 if (Traits::Is64Bit) { |
| 2362 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2355 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2363 Variable *T = makeReg(IceType_f64); | 2356 Variable *T = makeReg(IceType_f64); |
| 2364 _movd(T, Src0RM); | 2357 _movd(T, Src0RM); |
| 2365 _mov(Dest, T); | 2358 _mov(Dest, T); |
| 2366 } else { | 2359 } else { |
| 2367 Src0 = legalize(Src0); | 2360 Src0 = legalize(Src0); |
| 2368 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { | 2361 if (llvm::isa<X86OperandMem>(Src0)) { |
| 2369 Variable *T = Func->makeVariable(DestTy); | 2362 Variable *T = Func->makeVariable(DestTy); |
| 2370 _movq(T, Src0); | 2363 _movq(T, Src0); |
| 2371 _movq(Dest, T); | 2364 _movq(Dest, T); |
| 2372 break; | 2365 break; |
| 2373 } | 2366 } |
| 2374 // a.f64 = bitcast b.i64 ==> | 2367 // a.f64 = bitcast b.i64 ==> |
| 2375 // t_lo.i32 = b_lo.i32 | 2368 // t_lo.i32 = b_lo.i32 |
| 2376 // FakeDef(s.f64) | 2369 // FakeDef(s.f64) |
| 2377 // lo(s.f64) = t_lo.i32 | 2370 // lo(s.f64) = t_lo.i32 |
| 2378 // t_hi.i32 = b_hi.i32 | 2371 // t_hi.i32 = b_hi.i32 |
| 2379 // hi(s.f64) = t_hi.i32 | 2372 // hi(s.f64) = t_hi.i32 |
| 2380 // a.f64 = s.f64 | 2373 // a.f64 = s.f64 |
| 2381 typename Traits::SpillVariable *SpillVar = | 2374 SpillVariable *SpillVar = |
| 2382 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | 2375 Func->makeVariable<SpillVariable>(IceType_f64); |
| 2383 SpillVar->setLinkedTo(Dest); | 2376 SpillVar->setLinkedTo(Dest); |
| 2384 Variable *Spill = SpillVar; | 2377 Variable *Spill = SpillVar; |
| 2385 Spill->setMustNotHaveReg(); | 2378 Spill->setMustNotHaveReg(); |
| 2386 | 2379 |
| 2387 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 2380 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 2388 auto *SpillLo = Traits::VariableSplit::create( | 2381 auto *SpillLo = Traits::VariableSplit::create( |
| 2389 Func, Spill, Traits::VariableSplit::Low); | 2382 Func, Spill, Traits::VariableSplit::Low); |
| 2390 auto *SpillHi = Traits::VariableSplit::create( | 2383 auto *SpillHi = Traits::VariableSplit::create( |
| 2391 Func, Spill, Traits::VariableSplit::High); | 2384 Func, Spill, Traits::VariableSplit::High); |
| 2392 _mov(T_Lo, loOperand(Src0)); | 2385 _mov(T_Lo, loOperand(Src0)); |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 2411 case IceType_v4i32: | 2404 case IceType_v4i32: |
| 2412 case IceType_v4f32: { | 2405 case IceType_v4f32: { |
| 2413 _movp(Dest, legalizeToReg(Src0)); | 2406 _movp(Dest, legalizeToReg(Src0)); |
| 2414 } break; | 2407 } break; |
| 2415 } | 2408 } |
| 2416 break; | 2409 break; |
| 2417 } | 2410 } |
| 2418 } | 2411 } |
| 2419 } | 2412 } |
| 2420 | 2413 |
| 2421 template <class Machine> | 2414 template <typename TraitsType> |
| 2422 void TargetX86Base<Machine>::lowerExtractElement( | 2415 void TargetX86Base<TraitsType>::lowerExtractElement( |
| 2423 const InstExtractElement *Inst) { | 2416 const InstExtractElement *Inst) { |
| 2424 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2417 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2425 ConstantInteger32 *ElementIndex = | 2418 ConstantInteger32 *ElementIndex = |
| 2426 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2419 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
| 2427 // Only constant indices are allowed in PNaCl IR. | 2420 // Only constant indices are allowed in PNaCl IR. |
| 2428 assert(ElementIndex); | 2421 assert(ElementIndex); |
| 2429 | 2422 |
| 2430 unsigned Index = ElementIndex->getValue(); | 2423 unsigned Index = ElementIndex->getValue(); |
| 2431 Type Ty = SourceVectNotLegalized->getType(); | 2424 Type Ty = SourceVectNotLegalized->getType(); |
| 2432 Type ElementTy = typeElementType(Ty); | 2425 Type ElementTy = typeElementType(Ty); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2474 // Spill the value to a stack slot and do the extraction in memory. | 2467 // Spill the value to a stack slot and do the extraction in memory. |
| 2475 // | 2468 // |
| 2476 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support | 2469 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support |
| 2477 // for legalizing to mem is implemented. | 2470 // for legalizing to mem is implemented. |
| 2478 Variable *Slot = Func->makeVariable(Ty); | 2471 Variable *Slot = Func->makeVariable(Ty); |
| 2479 Slot->setMustNotHaveReg(); | 2472 Slot->setMustNotHaveReg(); |
| 2480 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); | 2473 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); |
| 2481 | 2474 |
| 2482 // Compute the location of the element in memory. | 2475 // Compute the location of the element in memory. |
| 2483 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); | 2476 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); |
| 2484 typename Traits::X86OperandMem *Loc = | 2477 X86OperandMem *Loc = |
| 2485 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 2478 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
| 2486 _mov(ExtractedElementR, Loc); | 2479 _mov(ExtractedElementR, Loc); |
| 2487 } | 2480 } |
| 2488 | 2481 |
| 2489 if (ElementTy == IceType_i1) { | 2482 if (ElementTy == IceType_i1) { |
| 2490 // Truncate extracted integers to i1s if necessary. | 2483 // Truncate extracted integers to i1s if necessary. |
| 2491 Variable *T = makeReg(IceType_i1); | 2484 Variable *T = makeReg(IceType_i1); |
| 2492 InstCast *Cast = | 2485 InstCast *Cast = |
| 2493 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); | 2486 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); |
| 2494 lowerCast(Cast); | 2487 lowerCast(Cast); |
| 2495 ExtractedElementR = T; | 2488 ExtractedElementR = T; |
| 2496 } | 2489 } |
| 2497 | 2490 |
| 2498 // Copy the element to the destination. | 2491 // Copy the element to the destination. |
| 2499 Variable *Dest = Inst->getDest(); | 2492 Variable *Dest = Inst->getDest(); |
| 2500 _mov(Dest, ExtractedElementR); | 2493 _mov(Dest, ExtractedElementR); |
| 2501 } | 2494 } |
| 2502 | 2495 |
| 2503 template <class Machine> | 2496 template <typename TraitsType> |
| 2504 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) { | 2497 void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) { |
| 2505 Variable *Dest = Fcmp->getDest(); | 2498 Variable *Dest = Fcmp->getDest(); |
| 2506 | 2499 |
| 2507 if (isVectorType(Dest->getType())) { | 2500 if (isVectorType(Dest->getType())) { |
| 2508 lowerFcmpVector(Fcmp); | 2501 lowerFcmpVector(Fcmp); |
| 2509 } else { | 2502 } else { |
| 2510 constexpr Inst *Consumer = nullptr; | 2503 constexpr Inst *Consumer = nullptr; |
| 2511 lowerFcmpAndConsumer(Fcmp, Consumer); | 2504 lowerFcmpAndConsumer(Fcmp, Consumer); |
| 2512 } | 2505 } |
| 2513 } | 2506 } |
| 2514 | 2507 |
| 2515 template <class Machine> | 2508 template <typename TraitsType> |
| 2516 void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, | 2509 void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, |
| 2517 const Inst *Consumer) { | 2510 const Inst *Consumer) { |
| 2518 Operand *Src0 = Fcmp->getSrc(0); | 2511 Operand *Src0 = Fcmp->getSrc(0); |
| 2519 Operand *Src1 = Fcmp->getSrc(1); | 2512 Operand *Src1 = Fcmp->getSrc(1); |
| 2520 Variable *Dest = Fcmp->getDest(); | 2513 Variable *Dest = Fcmp->getDest(); |
| 2521 | 2514 |
| 2522 if (isVectorType(Dest->getType())) | 2515 if (isVectorType(Dest->getType())) |
| 2523 llvm::report_fatal_error("Vector compare/branch cannot be folded"); | 2516 llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
| 2524 | 2517 |
| 2525 if (Consumer != nullptr) { | 2518 if (Consumer != nullptr) { |
| 2526 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2519 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2527 if (lowerOptimizeFcmpSelect(Fcmp, Select)) | 2520 if (lowerOptimizeFcmpSelect(Fcmp, Select)) |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2559 assert(Traits::TableFcmp[Index].Default); | 2552 assert(Traits::TableFcmp[Index].Default); |
| 2560 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); | 2553 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); |
| 2561 return; | 2554 return; |
| 2562 } | 2555 } |
| 2563 } | 2556 } |
| 2564 int32_t IntDefault = Traits::TableFcmp[Index].Default; | 2557 int32_t IntDefault = Traits::TableFcmp[Index].Default; |
| 2565 if (Consumer == nullptr) { | 2558 if (Consumer == nullptr) { |
| 2566 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); | 2559 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); |
| 2567 _mov(Dest, Default); | 2560 _mov(Dest, Default); |
| 2568 if (HasC1) { | 2561 if (HasC1) { |
| 2569 typename Traits::Insts::Label *Label = | 2562 InstX86Label *Label = InstX86Label::create(Func, this); |
| 2570 Traits::Insts::Label::create(Func, this); | |
| 2571 _br(Traits::TableFcmp[Index].C1, Label); | 2563 _br(Traits::TableFcmp[Index].C1, Label); |
| 2572 if (HasC2) { | 2564 if (HasC2) { |
| 2573 _br(Traits::TableFcmp[Index].C2, Label); | 2565 _br(Traits::TableFcmp[Index].C2, Label); |
| 2574 } | 2566 } |
| 2575 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); | 2567 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); |
| 2576 _redefined(_mov(Dest, NonDefault)); | 2568 _redefined(_mov(Dest, NonDefault)); |
| 2577 Context.insert(Label); | 2569 Context.insert(Label); |
| 2578 } | 2570 } |
| 2579 return; | 2571 return; |
| 2580 } | 2572 } |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 2595 return; | 2587 return; |
| 2596 } | 2588 } |
| 2597 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2589 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2598 Operand *SrcT = Select->getTrueOperand(); | 2590 Operand *SrcT = Select->getTrueOperand(); |
| 2599 Operand *SrcF = Select->getFalseOperand(); | 2591 Operand *SrcF = Select->getFalseOperand(); |
| 2600 Variable *SelectDest = Select->getDest(); | 2592 Variable *SelectDest = Select->getDest(); |
| 2601 if (IntDefault != 0) | 2593 if (IntDefault != 0) |
| 2602 std::swap(SrcT, SrcF); | 2594 std::swap(SrcT, SrcF); |
| 2603 lowerMove(SelectDest, SrcF, false); | 2595 lowerMove(SelectDest, SrcF, false); |
| 2604 if (HasC1) { | 2596 if (HasC1) { |
| 2605 typename Traits::Insts::Label *Label = | 2597 InstX86Label *Label = InstX86Label::create(Func, this); |
| 2606 Traits::Insts::Label::create(Func, this); | |
| 2607 _br(Traits::TableFcmp[Index].C1, Label); | 2598 _br(Traits::TableFcmp[Index].C1, Label); |
| 2608 if (HasC2) { | 2599 if (HasC2) { |
| 2609 _br(Traits::TableFcmp[Index].C2, Label); | 2600 _br(Traits::TableFcmp[Index].C2, Label); |
| 2610 } | 2601 } |
| 2611 static constexpr bool IsRedefinition = true; | 2602 static constexpr bool IsRedefinition = true; |
| 2612 lowerMove(SelectDest, SrcT, IsRedefinition); | 2603 lowerMove(SelectDest, SrcT, IsRedefinition); |
| 2613 Context.insert(Label); | 2604 Context.insert(Label); |
| 2614 } | 2605 } |
| 2615 return; | 2606 return; |
| 2616 } | 2607 } |
| 2617 llvm::report_fatal_error("Unexpected consumer type"); | 2608 llvm::report_fatal_error("Unexpected consumer type"); |
| 2618 } | 2609 } |
| 2619 | 2610 |
| 2620 template <class Machine> | 2611 template <typename TraitsType> |
| 2621 void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) { | 2612 void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) { |
| 2622 Operand *Src0 = Fcmp->getSrc(0); | 2613 Operand *Src0 = Fcmp->getSrc(0); |
| 2623 Operand *Src1 = Fcmp->getSrc(1); | 2614 Operand *Src1 = Fcmp->getSrc(1); |
| 2624 Variable *Dest = Fcmp->getDest(); | 2615 Variable *Dest = Fcmp->getDest(); |
| 2625 | 2616 |
| 2626 if (!isVectorType(Dest->getType())) | 2617 if (!isVectorType(Dest->getType())) |
| 2627 llvm::report_fatal_error("Expected vector compare"); | 2618 llvm::report_fatal_error("Expected vector compare"); |
| 2628 | 2619 |
| 2629 InstFcmp::FCond Condition = Fcmp->getCondition(); | 2620 InstFcmp::FCond Condition = Fcmp->getCondition(); |
| 2630 size_t Index = static_cast<size_t>(Condition); | 2621 size_t Index = static_cast<size_t>(Condition); |
| 2631 assert(Index < Traits::TableFcmpSize); | 2622 assert(Index < Traits::TableFcmpSize); |
| 2632 | 2623 |
| 2633 if (Traits::TableFcmp[Index].SwapVectorOperands) | 2624 if (Traits::TableFcmp[Index].SwapVectorOperands) |
| 2634 std::swap(Src0, Src1); | 2625 std::swap(Src0, Src1); |
| 2635 | 2626 |
| 2636 Variable *T = nullptr; | 2627 Variable *T = nullptr; |
| 2637 | 2628 |
| 2638 if (Condition == InstFcmp::True) { | 2629 if (Condition == InstFcmp::True) { |
| 2639 // makeVectorOfOnes() requires an integer vector type. | 2630 // makeVectorOfOnes() requires an integer vector type. |
| 2640 T = makeVectorOfMinusOnes(IceType_v4i32); | 2631 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 2641 } else if (Condition == InstFcmp::False) { | 2632 } else if (Condition == InstFcmp::False) { |
| 2642 T = makeVectorOfZeros(Dest->getType()); | 2633 T = makeVectorOfZeros(Dest->getType()); |
| 2643 } else { | 2634 } else { |
| 2644 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2635 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2645 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2636 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2646 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2637 if (llvm::isa<X86OperandMem>(Src1RM)) |
| 2647 Src1RM = legalizeToReg(Src1RM); | 2638 Src1RM = legalizeToReg(Src1RM); |
| 2648 | 2639 |
| 2649 switch (Condition) { | 2640 switch (Condition) { |
| 2650 default: { | 2641 default: { |
| 2651 typename Traits::Cond::CmppsCond Predicate = | 2642 CmppsCond Predicate = Traits::TableFcmp[Index].Predicate; |
| 2652 Traits::TableFcmp[Index].Predicate; | |
| 2653 assert(Predicate != Traits::Cond::Cmpps_Invalid); | 2643 assert(Predicate != Traits::Cond::Cmpps_Invalid); |
| 2654 T = makeReg(Src0RM->getType()); | 2644 T = makeReg(Src0RM->getType()); |
| 2655 _movp(T, Src0RM); | 2645 _movp(T, Src0RM); |
| 2656 _cmpps(T, Src1RM, Predicate); | 2646 _cmpps(T, Src1RM, Predicate); |
| 2657 } break; | 2647 } break; |
| 2658 case InstFcmp::One: { | 2648 case InstFcmp::One: { |
| 2659 // Check both unequal and ordered. | 2649 // Check both unequal and ordered. |
| 2660 T = makeReg(Src0RM->getType()); | 2650 T = makeReg(Src0RM->getType()); |
| 2661 Variable *T2 = makeReg(Src0RM->getType()); | 2651 Variable *T2 = makeReg(Src0RM->getType()); |
| 2662 _movp(T, Src0RM); | 2652 _movp(T, Src0RM); |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 2684 } | 2674 } |
| 2685 | 2675 |
| 2686 inline bool isZero(const Operand *Opnd) { | 2676 inline bool isZero(const Operand *Opnd) { |
| 2687 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) | 2677 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) |
| 2688 return C64->getValue() == 0; | 2678 return C64->getValue() == 0; |
| 2689 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 2679 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 2690 return C32->getValue() == 0; | 2680 return C32->getValue() == 0; |
| 2691 return false; | 2681 return false; |
| 2692 } | 2682 } |
| 2693 | 2683 |
| 2694 template <class Machine> | 2684 template <typename TraitsType> |
| 2695 void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp, | 2685 void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp, |
| 2696 const Inst *Consumer) { | 2686 const Inst *Consumer) { |
| 2697 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2687 Operand *Src0 = legalize(Icmp->getSrc(0)); |
| 2698 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2688 Operand *Src1 = legalize(Icmp->getSrc(1)); |
| 2699 Variable *Dest = Icmp->getDest(); | 2689 Variable *Dest = Icmp->getDest(); |
| 2700 | 2690 |
| 2701 if (isVectorType(Dest->getType())) | 2691 if (isVectorType(Dest->getType())) |
| 2702 llvm::report_fatal_error("Vector compare/branch cannot be folded"); | 2692 llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
| 2703 | 2693 |
| 2704 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { | 2694 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 2705 lowerIcmp64(Icmp, Consumer); | 2695 lowerIcmp64(Icmp, Consumer); |
| 2706 return; | 2696 return; |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 2718 movOrConsumer(false, Dest, Consumer); | 2708 movOrConsumer(false, Dest, Consumer); |
| 2719 return; | 2709 return; |
| 2720 } | 2710 } |
| 2721 } | 2711 } |
| 2722 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2712 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 2723 _cmp(Src0RM, Src1); | 2713 _cmp(Src0RM, Src1); |
| 2724 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, | 2714 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, |
| 2725 Consumer); | 2715 Consumer); |
| 2726 } | 2716 } |
| 2727 | 2717 |
| 2728 template <class Machine> | 2718 template <typename TraitsType> |
| 2729 void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) { | 2719 void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) { |
| 2730 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2720 Operand *Src0 = legalize(Icmp->getSrc(0)); |
| 2731 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2721 Operand *Src1 = legalize(Icmp->getSrc(1)); |
| 2732 Variable *Dest = Icmp->getDest(); | 2722 Variable *Dest = Icmp->getDest(); |
| 2733 | 2723 |
| 2734 if (!isVectorType(Dest->getType())) | 2724 if (!isVectorType(Dest->getType())) |
| 2735 llvm::report_fatal_error("Expected a vector compare"); | 2725 llvm::report_fatal_error("Expected a vector compare"); |
| 2736 | 2726 |
| 2737 Type Ty = Src0->getType(); | 2727 Type Ty = Src0->getType(); |
| 2738 // Promote i1 vectors to 128 bit integer vector types. | 2728 // Promote i1 vectors to 128 bit integer vector types. |
| 2739 if (typeElementType(Ty) == IceType_i1) { | 2729 if (typeElementType(Ty) == IceType_i1) { |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2781 Src0RM = T0; | 2771 Src0RM = T0; |
| 2782 Src1RM = T1; | 2772 Src1RM = T1; |
| 2783 } | 2773 } |
| 2784 | 2774 |
| 2785 Variable *T = makeReg(Ty); | 2775 Variable *T = makeReg(Ty); |
| 2786 switch (Condition) { | 2776 switch (Condition) { |
| 2787 default: | 2777 default: |
| 2788 llvm_unreachable("unexpected condition"); | 2778 llvm_unreachable("unexpected condition"); |
| 2789 break; | 2779 break; |
| 2790 case InstIcmp::Eq: { | 2780 case InstIcmp::Eq: { |
| 2791 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2781 if (llvm::isa<X86OperandMem>(Src1RM)) |
| 2792 Src1RM = legalizeToReg(Src1RM); | 2782 Src1RM = legalizeToReg(Src1RM); |
| 2793 _movp(T, Src0RM); | 2783 _movp(T, Src0RM); |
| 2794 _pcmpeq(T, Src1RM); | 2784 _pcmpeq(T, Src1RM); |
| 2795 } break; | 2785 } break; |
| 2796 case InstIcmp::Ne: { | 2786 case InstIcmp::Ne: { |
| 2797 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2787 if (llvm::isa<X86OperandMem>(Src1RM)) |
| 2798 Src1RM = legalizeToReg(Src1RM); | 2788 Src1RM = legalizeToReg(Src1RM); |
| 2799 _movp(T, Src0RM); | 2789 _movp(T, Src0RM); |
| 2800 _pcmpeq(T, Src1RM); | 2790 _pcmpeq(T, Src1RM); |
| 2801 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2791 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2802 _pxor(T, MinusOne); | 2792 _pxor(T, MinusOne); |
| 2803 } break; | 2793 } break; |
| 2804 case InstIcmp::Ugt: | 2794 case InstIcmp::Ugt: |
| 2805 case InstIcmp::Sgt: { | 2795 case InstIcmp::Sgt: { |
| 2806 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2796 if (llvm::isa<X86OperandMem>(Src1RM)) |
| 2807 Src1RM = legalizeToReg(Src1RM); | 2797 Src1RM = legalizeToReg(Src1RM); |
| 2808 _movp(T, Src0RM); | 2798 _movp(T, Src0RM); |
| 2809 _pcmpgt(T, Src1RM); | 2799 _pcmpgt(T, Src1RM); |
| 2810 } break; | 2800 } break; |
| 2811 case InstIcmp::Uge: | 2801 case InstIcmp::Uge: |
| 2812 case InstIcmp::Sge: { | 2802 case InstIcmp::Sge: { |
| 2813 // !(Src1RM > Src0RM) | 2803 // !(Src1RM > Src0RM) |
| 2814 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2804 if (llvm::isa<X86OperandMem>(Src0RM)) |
| 2815 Src0RM = legalizeToReg(Src0RM); | 2805 Src0RM = legalizeToReg(Src0RM); |
| 2816 _movp(T, Src1RM); | 2806 _movp(T, Src1RM); |
| 2817 _pcmpgt(T, Src0RM); | 2807 _pcmpgt(T, Src0RM); |
| 2818 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2808 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2819 _pxor(T, MinusOne); | 2809 _pxor(T, MinusOne); |
| 2820 } break; | 2810 } break; |
| 2821 case InstIcmp::Ult: | 2811 case InstIcmp::Ult: |
| 2822 case InstIcmp::Slt: { | 2812 case InstIcmp::Slt: { |
| 2823 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2813 if (llvm::isa<X86OperandMem>(Src0RM)) |
| 2824 Src0RM = legalizeToReg(Src0RM); | 2814 Src0RM = legalizeToReg(Src0RM); |
| 2825 _movp(T, Src1RM); | 2815 _movp(T, Src1RM); |
| 2826 _pcmpgt(T, Src0RM); | 2816 _pcmpgt(T, Src0RM); |
| 2827 } break; | 2817 } break; |
| 2828 case InstIcmp::Ule: | 2818 case InstIcmp::Ule: |
| 2829 case InstIcmp::Sle: { | 2819 case InstIcmp::Sle: { |
| 2830 // !(Src0RM > Src1RM) | 2820 // !(Src0RM > Src1RM) |
| 2831 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2821 if (llvm::isa<X86OperandMem>(Src1RM)) |
| 2832 Src1RM = legalizeToReg(Src1RM); | 2822 Src1RM = legalizeToReg(Src1RM); |
| 2833 _movp(T, Src0RM); | 2823 _movp(T, Src0RM); |
| 2834 _pcmpgt(T, Src1RM); | 2824 _pcmpgt(T, Src1RM); |
| 2835 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2825 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2836 _pxor(T, MinusOne); | 2826 _pxor(T, MinusOne); |
| 2837 } break; | 2827 } break; |
| 2838 } | 2828 } |
| 2839 | 2829 |
| 2840 _movp(Dest, T); | 2830 _movp(Dest, T); |
| 2841 eliminateNextVectorSextInstruction(Dest); | 2831 eliminateNextVectorSextInstruction(Dest); |
| 2842 } | 2832 } |
| 2843 | 2833 |
| 2844 template <typename Machine> | 2834 template <typename TraitsType> |
| 2845 template <typename T> | 2835 template <typename T> |
| 2846 typename std::enable_if<!T::Is64Bit, void>::type | 2836 typename std::enable_if<!T::Is64Bit, void>::type |
| 2847 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, | 2837 TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp, |
| 2848 const Inst *Consumer) { | 2838 const Inst *Consumer) { |
| 2849 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2839 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| 2850 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2840 Operand *Src0 = legalize(Icmp->getSrc(0)); |
| 2851 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2841 Operand *Src1 = legalize(Icmp->getSrc(1)); |
| 2852 Variable *Dest = Icmp->getDest(); | 2842 Variable *Dest = Icmp->getDest(); |
| 2853 InstIcmp::ICond Condition = Icmp->getCondition(); | 2843 InstIcmp::ICond Condition = Icmp->getCondition(); |
| 2854 size_t Index = static_cast<size_t>(Condition); | 2844 size_t Index = static_cast<size_t>(Condition); |
| 2855 assert(Index < Traits::TableIcmp64Size); | 2845 assert(Index < Traits::TableIcmp64Size); |
| 2856 Operand *Src0LoRM = nullptr; | 2846 Operand *Src0LoRM = nullptr; |
| 2857 Operand *Src0HiRM = nullptr; | 2847 Operand *Src0HiRM = nullptr; |
| 2858 // Legalize the portions of Src0 that are going to be needed. | 2848 // Legalize the portions of Src0 that are going to be needed. |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2933 case InstIcmp::Sle: | 2923 case InstIcmp::Sle: |
| 2934 break; | 2924 break; |
| 2935 } | 2925 } |
| 2936 } | 2926 } |
| 2937 // Handle general compares. | 2927 // Handle general compares. |
| 2938 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 2928 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| 2939 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 2929 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| 2940 if (Consumer == nullptr) { | 2930 if (Consumer == nullptr) { |
| 2941 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); | 2931 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); |
| 2942 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); | 2932 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); |
| 2943 typename Traits::Insts::Label *LabelFalse = | 2933 InstX86Label *LabelFalse = InstX86Label::create(Func, this); |
| 2944 Traits::Insts::Label::create(Func, this); | 2934 InstX86Label *LabelTrue = InstX86Label::create(Func, this); |
| 2945 typename Traits::Insts::Label *LabelTrue = | |
| 2946 Traits::Insts::Label::create(Func, this); | |
| 2947 _mov(Dest, One); | 2935 _mov(Dest, One); |
| 2948 _cmp(Src0HiRM, Src1HiRI); | 2936 _cmp(Src0HiRM, Src1HiRI); |
| 2949 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2937 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2950 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | 2938 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 2951 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2939 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2952 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | 2940 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 2953 _cmp(Src0LoRM, Src1LoRI); | 2941 _cmp(Src0LoRM, Src1LoRI); |
| 2954 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | 2942 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 2955 Context.insert(LabelFalse); | 2943 Context.insert(LabelFalse); |
| 2956 _redefined(_mov(Dest, Zero)); | 2944 _redefined(_mov(Dest, Zero)); |
| 2957 Context.insert(LabelTrue); | 2945 Context.insert(LabelTrue); |
| 2958 return; | 2946 return; |
| 2959 } | 2947 } |
| 2960 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 2948 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 2961 _cmp(Src0HiRM, Src1HiRI); | 2949 _cmp(Src0HiRM, Src1HiRI); |
| 2962 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2950 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2963 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); | 2951 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); |
| 2964 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2952 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2965 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); | 2953 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); |
| 2966 _cmp(Src0LoRM, Src1LoRI); | 2954 _cmp(Src0LoRM, Src1LoRI); |
| 2967 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), | 2955 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), |
| 2968 Br->getTargetFalse()); | 2956 Br->getTargetFalse()); |
| 2969 return; | 2957 return; |
| 2970 } | 2958 } |
| 2971 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2959 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2972 Operand *SrcT = Select->getTrueOperand(); | 2960 Operand *SrcT = Select->getTrueOperand(); |
| 2973 Operand *SrcF = Select->getFalseOperand(); | 2961 Operand *SrcF = Select->getFalseOperand(); |
| 2974 Variable *SelectDest = Select->getDest(); | 2962 Variable *SelectDest = Select->getDest(); |
| 2975 typename Traits::Insts::Label *LabelFalse = | 2963 InstX86Label *LabelFalse = InstX86Label::create(Func, this); |
| 2976 Traits::Insts::Label::create(Func, this); | 2964 InstX86Label *LabelTrue = InstX86Label::create(Func, this); |
| 2977 typename Traits::Insts::Label *LabelTrue = | |
| 2978 Traits::Insts::Label::create(Func, this); | |
| 2979 lowerMove(SelectDest, SrcT, false); | 2965 lowerMove(SelectDest, SrcT, false); |
| 2980 _cmp(Src0HiRM, Src1HiRI); | 2966 _cmp(Src0HiRM, Src1HiRI); |
| 2981 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2967 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2982 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | 2968 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 2983 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2969 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2984 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | 2970 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 2985 _cmp(Src0LoRM, Src1LoRI); | 2971 _cmp(Src0LoRM, Src1LoRI); |
| 2986 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | 2972 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 2987 Context.insert(LabelFalse); | 2973 Context.insert(LabelFalse); |
| 2988 static constexpr bool IsRedefinition = true; | 2974 static constexpr bool IsRedefinition = true; |
| 2989 lowerMove(SelectDest, SrcF, IsRedefinition); | 2975 lowerMove(SelectDest, SrcF, IsRedefinition); |
| 2990 Context.insert(LabelTrue); | 2976 Context.insert(LabelTrue); |
| 2991 return; | 2977 return; |
| 2992 } | 2978 } |
| 2993 llvm::report_fatal_error("Unexpected consumer type"); | 2979 llvm::report_fatal_error("Unexpected consumer type"); |
| 2994 } | 2980 } |
| 2995 | 2981 |
| 2996 template <class Machine> | 2982 template <typename TraitsType> |
| 2997 void TargetX86Base<Machine>::setccOrConsumer( | 2983 void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition, |
| 2998 typename Traits::Cond::BrCond Condition, Variable *Dest, | 2984 Variable *Dest, |
| 2999 const Inst *Consumer) { | 2985 const Inst *Consumer) { |
| 3000 if (Consumer == nullptr) { | 2986 if (Consumer == nullptr) { |
| 3001 _setcc(Dest, Condition); | 2987 _setcc(Dest, Condition); |
| 3002 return; | 2988 return; |
| 3003 } | 2989 } |
| 3004 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 2990 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 3005 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); | 2991 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); |
| 3006 return; | 2992 return; |
| 3007 } | 2993 } |
| 3008 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2994 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 3009 Operand *SrcT = Select->getTrueOperand(); | 2995 Operand *SrcT = Select->getTrueOperand(); |
| 3010 Operand *SrcF = Select->getFalseOperand(); | 2996 Operand *SrcF = Select->getFalseOperand(); |
| 3011 Variable *SelectDest = Select->getDest(); | 2997 Variable *SelectDest = Select->getDest(); |
| 3012 lowerSelectMove(SelectDest, Condition, SrcT, SrcF); | 2998 lowerSelectMove(SelectDest, Condition, SrcT, SrcF); |
| 3013 return; | 2999 return; |
| 3014 } | 3000 } |
| 3015 llvm::report_fatal_error("Unexpected consumer type"); | 3001 llvm::report_fatal_error("Unexpected consumer type"); |
| 3016 } | 3002 } |
| 3017 | 3003 |
| 3018 template <class Machine> | 3004 template <typename TraitsType> |
| 3019 void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest, | 3005 void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest, |
| 3020 const Inst *Consumer) { | 3006 const Inst *Consumer) { |
| 3021 if (Consumer == nullptr) { | 3007 if (Consumer == nullptr) { |
| 3022 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3008 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
| 3023 return; | 3009 return; |
| 3024 } | 3010 } |
| 3025 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 3011 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 3026 // TODO(sehr,stichnot): This could be done with a single unconditional | 3012 // TODO(sehr,stichnot): This could be done with a single unconditional |
| 3027 // branch instruction, but subzero doesn't know how to handle the resulting | 3013 // branch instruction, but subzero doesn't know how to handle the resulting |
| 3028 // control flow graph changes now. Make it do so to eliminate mov and cmp. | 3014 // control flow graph changes now. Make it do so to eliminate mov and cmp. |
| 3029 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3015 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
| 3030 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); | 3016 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); |
| 3031 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3017 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 3032 return; | 3018 return; |
| 3033 } | 3019 } |
| 3034 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 3020 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 3035 Operand *Src = nullptr; | 3021 Operand *Src = nullptr; |
| 3036 if (IcmpResult) { | 3022 if (IcmpResult) { |
| 3037 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); | 3023 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); |
| 3038 } else { | 3024 } else { |
| 3039 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); | 3025 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); |
| 3040 } | 3026 } |
| 3041 Variable *SelectDest = Select->getDest(); | 3027 Variable *SelectDest = Select->getDest(); |
| 3042 lowerMove(SelectDest, Src, false); | 3028 lowerMove(SelectDest, Src, false); |
| 3043 return; | 3029 return; |
| 3044 } | 3030 } |
| 3045 llvm::report_fatal_error("Unexpected consumer type"); | 3031 llvm::report_fatal_error("Unexpected consumer type"); |
| 3046 } | 3032 } |
| 3047 | 3033 |
| 3048 template <class Machine> | 3034 template <typename TraitsType> |
| 3049 void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith, | 3035 void TargetX86Base<TraitsType>::lowerArithAndConsumer( |
| 3050 const Inst *Consumer) { | 3036 const InstArithmetic *Arith, const Inst *Consumer) { |
| 3051 Variable *T = nullptr; | 3037 Variable *T = nullptr; |
| 3052 Operand *Src0 = legalize(Arith->getSrc(0)); | 3038 Operand *Src0 = legalize(Arith->getSrc(0)); |
| 3053 Operand *Src1 = legalize(Arith->getSrc(1)); | 3039 Operand *Src1 = legalize(Arith->getSrc(1)); |
| 3054 Variable *Dest = Arith->getDest(); | 3040 Variable *Dest = Arith->getDest(); |
| 3055 switch (Arith->getOp()) { | 3041 switch (Arith->getOp()) { |
| 3056 default: | 3042 default: |
| 3057 llvm_unreachable("arithmetic operator not AND or OR"); | 3043 llvm_unreachable("arithmetic operator not AND or OR"); |
| 3058 break; | 3044 break; |
| 3059 case InstArithmetic::And: | 3045 case InstArithmetic::And: |
| 3060 _mov(T, Src0); | 3046 _mov(T, Src0); |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 3077 } | 3063 } |
| 3078 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 3064 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 3079 Context.insert<InstFakeUse>(T); | 3065 Context.insert<InstFakeUse>(T); |
| 3080 Context.insert<InstFakeDef>(Dest); | 3066 Context.insert<InstFakeDef>(Dest); |
| 3081 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3067 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 3082 return; | 3068 return; |
| 3083 } | 3069 } |
| 3084 llvm::report_fatal_error("Unexpected consumer type"); | 3070 llvm::report_fatal_error("Unexpected consumer type"); |
| 3085 } | 3071 } |
| 3086 | 3072 |
| 3087 template <class Machine> | 3073 template <typename TraitsType> |
| 3088 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 3074 void TargetX86Base<TraitsType>::lowerInsertElement( |
| 3075 const InstInsertElement *Inst) { | |
| 3089 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3076 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 3090 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3077 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| 3091 ConstantInteger32 *ElementIndex = | 3078 ConstantInteger32 *ElementIndex = |
| 3092 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 3079 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
| 3093 // Only constant indices are allowed in PNaCl IR. | 3080 // Only constant indices are allowed in PNaCl IR. |
| 3094 assert(ElementIndex); | 3081 assert(ElementIndex); |
| 3095 unsigned Index = ElementIndex->getValue(); | 3082 unsigned Index = ElementIndex->getValue(); |
| 3096 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 3083 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
| 3097 | 3084 |
| 3098 Type Ty = SourceVectNotLegalized->getType(); | 3085 Type Ty = SourceVectNotLegalized->getType(); |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 3116 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3103 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
| 3117 Operand *SourceVectRM = | 3104 Operand *SourceVectRM = |
| 3118 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3105 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 3119 Variable *T = makeReg(Ty); | 3106 Variable *T = makeReg(Ty); |
| 3120 _movp(T, SourceVectRM); | 3107 _movp(T, SourceVectRM); |
| 3121 if (Ty == IceType_v4f32) { | 3108 if (Ty == IceType_v4f32) { |
| 3122 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3109 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
| 3123 } else { | 3110 } else { |
| 3124 // For the pinsrb and pinsrw instructions, when the source operand is a | 3111 // For the pinsrb and pinsrw instructions, when the source operand is a |
| 3125 // register, it must be a full r32 register like eax, and not ax/al/ah. | 3112 // register, it must be a full r32 register like eax, and not ax/al/ah. |
| 3126 // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use | 3113 // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for |
| 3114 // the use | |
| 3127 // of r16 and r8 by converting them through getBaseReg(), while emitIAS() | 3115 // of r16 and r8 by converting them through getBaseReg(), while emitIAS() |
| 3128 // validates that the original and base register encodings are the same. | 3116 // validates that the original and base register encodings are the same. |
| 3129 if (ElementRM->getType() == IceType_i8 && | 3117 if (ElementRM->getType() == IceType_i8 && |
| 3130 llvm::isa<Variable>(ElementRM)) { | 3118 llvm::isa<Variable>(ElementRM)) { |
| 3131 // Don't use ah/bh/ch/dh for pinsrb. | 3119 // Don't use ah/bh/ch/dh for pinsrb. |
| 3132 ElementRM = copyToReg8(ElementRM); | 3120 ElementRM = copyToReg8(ElementRM); |
| 3133 } | 3121 } |
| 3134 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 3122 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); |
| 3135 } | 3123 } |
| 3136 _movp(Inst->getDest(), T); | 3124 _movp(Inst->getDest(), T); |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3201 // Spill the value to a stack slot and perform the insertion in memory. | 3189 // Spill the value to a stack slot and perform the insertion in memory. |
| 3202 // | 3190 // |
| 3203 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support | 3191 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support |
| 3204 // for legalizing to mem is implemented. | 3192 // for legalizing to mem is implemented. |
| 3205 Variable *Slot = Func->makeVariable(Ty); | 3193 Variable *Slot = Func->makeVariable(Ty); |
| 3206 Slot->setMustNotHaveReg(); | 3194 Slot->setMustNotHaveReg(); |
| 3207 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); | 3195 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); |
| 3208 | 3196 |
| 3209 // Compute the location of the position to insert in memory. | 3197 // Compute the location of the position to insert in memory. |
| 3210 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); | 3198 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); |
| 3211 typename Traits::X86OperandMem *Loc = | 3199 X86OperandMem *Loc = |
| 3212 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 3200 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
| 3213 _store(legalizeToReg(ElementToInsertNotLegalized), Loc); | 3201 _store(legalizeToReg(ElementToInsertNotLegalized), Loc); |
| 3214 | 3202 |
| 3215 Variable *T = makeReg(Ty); | 3203 Variable *T = makeReg(Ty); |
| 3216 _movp(T, Slot); | 3204 _movp(T, Slot); |
| 3217 _movp(Inst->getDest(), T); | 3205 _movp(Inst->getDest(), T); |
| 3218 } | 3206 } |
| 3219 } | 3207 } |
| 3220 | 3208 |
| 3221 template <class Machine> | 3209 template <typename TraitsType> |
| 3222 void TargetX86Base<Machine>::lowerIntrinsicCall( | 3210 void TargetX86Base<TraitsType>::lowerIntrinsicCall( |
| 3223 const InstIntrinsicCall *Instr) { | 3211 const InstIntrinsicCall *Instr) { |
| 3224 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { | 3212 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
| 3225 case Intrinsics::AtomicCmpxchg: { | 3213 case Intrinsics::AtomicCmpxchg: { |
| 3226 if (!Intrinsics::isMemoryOrderValid( | 3214 if (!Intrinsics::isMemoryOrderValid( |
| 3227 ID, getConstantMemoryOrder(Instr->getArg(3)), | 3215 ID, getConstantMemoryOrder(Instr->getArg(3)), |
| 3228 getConstantMemoryOrder(Instr->getArg(4)))) { | 3216 getConstantMemoryOrder(Instr->getArg(4)))) { |
| 3229 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 3217 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| 3230 return; | 3218 return; |
| 3231 } | 3219 } |
| 3232 Variable *DestPrev = Instr->getDest(); | 3220 Variable *DestPrev = Instr->getDest(); |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3292 } | 3280 } |
| 3293 Variable *Dest = Instr->getDest(); | 3281 Variable *Dest = Instr->getDest(); |
| 3294 if (!Traits::Is64Bit) { | 3282 if (!Traits::Is64Bit) { |
| 3295 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { | 3283 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { |
| 3296 // Follow what GCC does and use a movq instead of what lowerLoad() | 3284 // Follow what GCC does and use a movq instead of what lowerLoad() |
| 3297 // normally does (split the load into two). Thus, this skips | 3285 // normally does (split the load into two). Thus, this skips |
| 3298 // load/arithmetic op folding. Load/arithmetic folding can't happen | 3286 // load/arithmetic op folding. Load/arithmetic folding can't happen |
| 3299 // anyway, since this is x86-32 and integer arithmetic only happens on | 3287 // anyway, since this is x86-32 and integer arithmetic only happens on |
| 3300 // 32-bit quantities. | 3288 // 32-bit quantities. |
| 3301 Variable *T = makeReg(IceType_f64); | 3289 Variable *T = makeReg(IceType_f64); |
| 3302 typename Traits::X86OperandMem *Addr = | 3290 X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); |
| 3303 formMemoryOperand(Instr->getArg(0), IceType_f64); | |
| 3304 _movq(T, Addr); | 3291 _movq(T, Addr); |
| 3305 // Then cast the bits back out of the XMM register to the i64 Dest. | 3292 // Then cast the bits back out of the XMM register to the i64 Dest. |
| 3306 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 3293 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
| 3307 lowerCast(Cast); | 3294 lowerCast(Cast); |
| 3308 // Make sure that the atomic load isn't elided when unused. | 3295 // Make sure that the atomic load isn't elided when unused. |
| 3309 Context.insert<InstFakeUse>(Dest64On32->getLo()); | 3296 Context.insert<InstFakeUse>(Dest64On32->getLo()); |
| 3310 Context.insert<InstFakeUse>(Dest64On32->getHi()); | 3297 Context.insert<InstFakeUse>(Dest64On32->getHi()); |
| 3311 return; | 3298 return; |
| 3312 } | 3299 } |
| 3313 } | 3300 } |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 3343 Operand *Value = Instr->getArg(0); | 3330 Operand *Value = Instr->getArg(0); |
| 3344 Operand *Ptr = Instr->getArg(1); | 3331 Operand *Ptr = Instr->getArg(1); |
| 3345 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { | 3332 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { |
| 3346 // Use a movq instead of what lowerStore() normally does (split the store | 3333 // Use a movq instead of what lowerStore() normally does (split the store |
| 3347 // into two), following what GCC does. Cast the bits from int -> to an | 3334 // into two), following what GCC does. Cast the bits from int -> to an |
| 3348 // xmm register first. | 3335 // xmm register first. |
| 3349 Variable *T = makeReg(IceType_f64); | 3336 Variable *T = makeReg(IceType_f64); |
| 3350 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); | 3337 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); |
| 3351 lowerCast(Cast); | 3338 lowerCast(Cast); |
| 3352 // Then store XMM w/ a movq. | 3339 // Then store XMM w/ a movq. |
| 3353 typename Traits::X86OperandMem *Addr = | 3340 X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64); |
| 3354 formMemoryOperand(Ptr, IceType_f64); | |
| 3355 _storeq(T, Addr); | 3341 _storeq(T, Addr); |
| 3356 _mfence(); | 3342 _mfence(); |
| 3357 return; | 3343 return; |
| 3358 } | 3344 } |
| 3359 auto *Store = InstStore::create(Func, Value, Ptr); | 3345 auto *Store = InstStore::create(Func, Value, Ptr); |
| 3360 lowerStore(Store); | 3346 lowerStore(Store); |
| 3361 _mfence(); | 3347 _mfence(); |
| 3362 return; | 3348 return; |
| 3363 } | 3349 } |
| 3364 case Intrinsics::Bswap: { | 3350 case Intrinsics::Bswap: { |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3488 return; | 3474 return; |
| 3489 } | 3475 } |
| 3490 case Intrinsics::Fabs: { | 3476 case Intrinsics::Fabs: { |
| 3491 Operand *Src = legalize(Instr->getArg(0)); | 3477 Operand *Src = legalize(Instr->getArg(0)); |
| 3492 Type Ty = Src->getType(); | 3478 Type Ty = Src->getType(); |
| 3493 Variable *Dest = Instr->getDest(); | 3479 Variable *Dest = Instr->getDest(); |
| 3494 Variable *T = makeVectorOfFabsMask(Ty); | 3480 Variable *T = makeVectorOfFabsMask(Ty); |
| 3495 // The pand instruction operates on an m128 memory operand, so if Src is an | 3481 // The pand instruction operates on an m128 memory operand, so if Src is an |
| 3496 // f32 or f64, we need to make sure it's in a register. | 3482 // f32 or f64, we need to make sure it's in a register. |
| 3497 if (isVectorType(Ty)) { | 3483 if (isVectorType(Ty)) { |
| 3498 if (llvm::isa<typename Traits::X86OperandMem>(Src)) | 3484 if (llvm::isa<X86OperandMem>(Src)) |
| 3499 Src = legalizeToReg(Src); | 3485 Src = legalizeToReg(Src); |
| 3500 } else { | 3486 } else { |
| 3501 Src = legalizeToReg(Src); | 3487 Src = legalizeToReg(Src); |
| 3502 } | 3488 } |
| 3503 _pand(T, Src); | 3489 _pand(T, Src); |
| 3504 if (isVectorType(Ty)) | 3490 if (isVectorType(Ty)) |
| 3505 _movp(Dest, T); | 3491 _movp(Dest, T); |
| 3506 else | 3492 else |
| 3507 _mov(Dest, T); | 3493 _mov(Dest, T); |
| 3508 return; | 3494 return; |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 3521 case Intrinsics::Memmove: { | 3507 case Intrinsics::Memmove: { |
| 3522 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); | 3508 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
| 3523 return; | 3509 return; |
| 3524 } | 3510 } |
| 3525 case Intrinsics::Memset: { | 3511 case Intrinsics::Memset: { |
| 3526 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); | 3512 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
| 3527 return; | 3513 return; |
| 3528 } | 3514 } |
| 3529 case Intrinsics::NaClReadTP: { | 3515 case Intrinsics::NaClReadTP: { |
| 3530 if (Ctx->getFlags().getUseSandboxing()) { | 3516 if (Ctx->getFlags().getUseSandboxing()) { |
| 3531 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); | 3517 Operand *Src = |
| 3518 dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand); | |
| 3532 Variable *Dest = Instr->getDest(); | 3519 Variable *Dest = Instr->getDest(); |
| 3533 Variable *T = nullptr; | 3520 Variable *T = nullptr; |
| 3534 _mov(T, Src); | 3521 _mov(T, Src); |
| 3535 _mov(Dest, T); | 3522 _mov(Dest, T); |
| 3536 } else { | 3523 } else { |
| 3537 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); | 3524 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); |
| 3538 lowerCall(Call); | 3525 lowerCall(Call); |
| 3539 } | 3526 } |
| 3540 return; | 3527 return; |
| 3541 } | 3528 } |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 3571 case Intrinsics::Trap: | 3558 case Intrinsics::Trap: |
| 3572 _ud2(); | 3559 _ud2(); |
| 3573 return; | 3560 return; |
| 3574 case Intrinsics::UnknownIntrinsic: | 3561 case Intrinsics::UnknownIntrinsic: |
| 3575 Func->setError("Should not be lowering UnknownIntrinsic"); | 3562 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3576 return; | 3563 return; |
| 3577 } | 3564 } |
| 3578 return; | 3565 return; |
| 3579 } | 3566 } |
| 3580 | 3567 |
| 3581 template <class Machine> | 3568 template <typename TraitsType> |
| 3582 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3569 void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3583 Operand *Ptr, Operand *Expected, | 3570 Operand *Ptr, |
| 3584 Operand *Desired) { | 3571 Operand *Expected, |
| 3572 Operand *Desired) { | |
| 3585 Type Ty = Expected->getType(); | 3573 Type Ty = Expected->getType(); |
| 3586 if (!Traits::Is64Bit && Ty == IceType_i64) { | 3574 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 3587 // Reserve the pre-colored registers first, before adding any more | 3575 // Reserve the pre-colored registers first, before adding any more |
| 3588 // infinite-weight variables from formMemoryOperand's legalization. | 3576 // infinite-weight variables from formMemoryOperand's legalization. |
| 3589 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3577 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3590 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3578 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3591 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3579 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3592 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3580 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3593 _mov(T_eax, loOperand(Expected)); | 3581 _mov(T_eax, loOperand(Expected)); |
| 3594 _mov(T_edx, hiOperand(Expected)); | 3582 _mov(T_edx, hiOperand(Expected)); |
| 3595 _mov(T_ebx, loOperand(Desired)); | 3583 _mov(T_ebx, loOperand(Desired)); |
| 3596 _mov(T_ecx, hiOperand(Desired)); | 3584 _mov(T_ecx, hiOperand(Desired)); |
| 3597 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3585 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3598 constexpr bool Locked = true; | 3586 constexpr bool Locked = true; |
| 3599 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3587 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3600 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3588 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 3601 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3589 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 3602 _mov(DestLo, T_eax); | 3590 _mov(DestLo, T_eax); |
| 3603 _mov(DestHi, T_edx); | 3591 _mov(DestHi, T_edx); |
| 3604 return; | 3592 return; |
| 3605 } | 3593 } |
| 3606 int32_t Eax; | 3594 int32_t Eax; |
| 3607 switch (Ty) { | 3595 switch (Ty) { |
| 3608 default: | 3596 default: |
| 3609 llvm::report_fatal_error("Bad type for cmpxchg"); | 3597 llvm::report_fatal_error("Bad type for cmpxchg"); |
| 3610 case IceType_i64: | 3598 case IceType_i64: |
| 3611 Eax = Traits::getRaxOrDie(); | 3599 Eax = Traits::getRaxOrDie(); |
| 3612 break; | 3600 break; |
| 3613 case IceType_i32: | 3601 case IceType_i32: |
| 3614 Eax = Traits::RegisterSet::Reg_eax; | 3602 Eax = Traits::RegisterSet::Reg_eax; |
| 3615 break; | 3603 break; |
| 3616 case IceType_i16: | 3604 case IceType_i16: |
| 3617 Eax = Traits::RegisterSet::Reg_ax; | 3605 Eax = Traits::RegisterSet::Reg_ax; |
| 3618 break; | 3606 break; |
| 3619 case IceType_i8: | 3607 case IceType_i8: |
| 3620 Eax = Traits::RegisterSet::Reg_al; | 3608 Eax = Traits::RegisterSet::Reg_al; |
| 3621 break; | 3609 break; |
| 3622 } | 3610 } |
| 3623 Variable *T_eax = makeReg(Ty, Eax); | 3611 Variable *T_eax = makeReg(Ty, Eax); |
| 3624 _mov(T_eax, Expected); | 3612 _mov(T_eax, Expected); |
| 3625 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3613 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3626 Variable *DesiredReg = legalizeToReg(Desired); | 3614 Variable *DesiredReg = legalizeToReg(Desired); |
| 3627 constexpr bool Locked = true; | 3615 constexpr bool Locked = true; |
| 3628 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3616 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 3629 _mov(DestPrev, T_eax); | 3617 _mov(DestPrev, T_eax); |
| 3630 } | 3618 } |
| 3631 | 3619 |
| 3632 template <class Machine> | 3620 template <typename TraitsType> |
| 3633 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 3621 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
| 3634 Operand *PtrToMem, | 3622 Operand *PtrToMem, |
| 3635 Operand *Expected, | 3623 Operand *Expected, |
| 3636 Operand *Desired) { | 3624 Operand *Desired) { |
| 3637 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3625 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 3638 return false; | 3626 return false; |
| 3639 // Peek ahead a few instructions and see how Dest is used. | 3627 // Peek ahead a few instructions and see how Dest is used. |
| 3640 // It's very common to have: | 3628 // It's very common to have: |
| 3641 // | 3629 // |
| 3642 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 3630 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
| 3643 // [%y_phi = ...] // list of phi stores | 3631 // [%y_phi = ...] // list of phi stores |
| 3644 // %p = icmp eq i32 %x, %expected | 3632 // %p = icmp eq i32 %x, %expected |
| 3645 // br i1 %p, label %l1, label %l2 | 3633 // br i1 %p, label %l1, label %l2 |
| 3646 // | 3634 // |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3698 NextBr->setDeleted(); | 3686 NextBr->setDeleted(); |
| 3699 Context.advanceNext(); | 3687 Context.advanceNext(); |
| 3700 Context.advanceNext(); | 3688 Context.advanceNext(); |
| 3701 return true; | 3689 return true; |
| 3702 } | 3690 } |
| 3703 } | 3691 } |
| 3704 } | 3692 } |
| 3705 return false; | 3693 return false; |
| 3706 } | 3694 } |
| 3707 | 3695 |
| 3708 template <class Machine> | 3696 template <typename TraitsType> |
| 3709 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3697 void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest, |
| 3710 Operand *Ptr, Operand *Val) { | 3698 uint32_t Operation, Operand *Ptr, |
| 3699 Operand *Val) { | |
| 3711 bool NeedsCmpxchg = false; | 3700 bool NeedsCmpxchg = false; |
| 3712 LowerBinOp Op_Lo = nullptr; | 3701 LowerBinOp Op_Lo = nullptr; |
| 3713 LowerBinOp Op_Hi = nullptr; | 3702 LowerBinOp Op_Hi = nullptr; |
| 3714 switch (Operation) { | 3703 switch (Operation) { |
| 3715 default: | 3704 default: |
| 3716 Func->setError("Unknown AtomicRMW operation"); | 3705 Func->setError("Unknown AtomicRMW operation"); |
| 3717 return; | 3706 return; |
| 3718 case Intrinsics::AtomicAdd: { | 3707 case Intrinsics::AtomicAdd: { |
| 3719 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3708 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3720 // All the fall-through paths must set this to true, but use this | 3709 // All the fall-through paths must set this to true, but use this |
| 3721 // for asserting. | 3710 // for asserting. |
| 3722 NeedsCmpxchg = true; | 3711 NeedsCmpxchg = true; |
| 3723 Op_Lo = &TargetX86Base<Machine>::_add; | 3712 Op_Lo = &TargetX86Base<TraitsType>::_add; |
| 3724 Op_Hi = &TargetX86Base<Machine>::_adc; | 3713 Op_Hi = &TargetX86Base<TraitsType>::_adc; |
| 3725 break; | 3714 break; |
| 3726 } | 3715 } |
| 3727 typename Traits::X86OperandMem *Addr = | 3716 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3728 formMemoryOperand(Ptr, Dest->getType()); | |
| 3729 constexpr bool Locked = true; | 3717 constexpr bool Locked = true; |
| 3730 Variable *T = nullptr; | 3718 Variable *T = nullptr; |
| 3731 _mov(T, Val); | 3719 _mov(T, Val); |
| 3732 _xadd(Addr, T, Locked); | 3720 _xadd(Addr, T, Locked); |
| 3733 _mov(Dest, T); | 3721 _mov(Dest, T); |
| 3734 return; | 3722 return; |
| 3735 } | 3723 } |
| 3736 case Intrinsics::AtomicSub: { | 3724 case Intrinsics::AtomicSub: { |
| 3737 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3725 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3738 NeedsCmpxchg = true; | 3726 NeedsCmpxchg = true; |
| 3739 Op_Lo = &TargetX86Base<Machine>::_sub; | 3727 Op_Lo = &TargetX86Base<TraitsType>::_sub; |
| 3740 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3728 Op_Hi = &TargetX86Base<TraitsType>::_sbb; |
| 3741 break; | 3729 break; |
| 3742 } | 3730 } |
| 3743 typename Traits::X86OperandMem *Addr = | 3731 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3744 formMemoryOperand(Ptr, Dest->getType()); | |
| 3745 constexpr bool Locked = true; | 3732 constexpr bool Locked = true; |
| 3746 Variable *T = nullptr; | 3733 Variable *T = nullptr; |
| 3747 _mov(T, Val); | 3734 _mov(T, Val); |
| 3748 _neg(T); | 3735 _neg(T); |
| 3749 _xadd(Addr, T, Locked); | 3736 _xadd(Addr, T, Locked); |
| 3750 _mov(Dest, T); | 3737 _mov(Dest, T); |
| 3751 return; | 3738 return; |
| 3752 } | 3739 } |
| 3753 case Intrinsics::AtomicOr: | 3740 case Intrinsics::AtomicOr: |
| 3754 // TODO(jvoung): If Dest is null or dead, then some of these | 3741 // TODO(jvoung): If Dest is null or dead, then some of these |
| 3755 // operations do not need an "exchange", but just a locked op. | 3742 // operations do not need an "exchange", but just a locked op. |
| 3756 // That appears to be "worth" it for sub, or, and, and xor. | 3743 // That appears to be "worth" it for sub, or, and, and xor. |
| 3757 // xadd is probably fine vs lock add for add, and xchg is fine | 3744 // xadd is probably fine vs lock add for add, and xchg is fine |
| 3758 // vs an atomic store. | 3745 // vs an atomic store. |
| 3759 NeedsCmpxchg = true; | 3746 NeedsCmpxchg = true; |
| 3760 Op_Lo = &TargetX86Base<Machine>::_or; | 3747 Op_Lo = &TargetX86Base<TraitsType>::_or; |
| 3761 Op_Hi = &TargetX86Base<Machine>::_or; | 3748 Op_Hi = &TargetX86Base<TraitsType>::_or; |
| 3762 break; | 3749 break; |
| 3763 case Intrinsics::AtomicAnd: | 3750 case Intrinsics::AtomicAnd: |
| 3764 NeedsCmpxchg = true; | 3751 NeedsCmpxchg = true; |
| 3765 Op_Lo = &TargetX86Base<Machine>::_and; | 3752 Op_Lo = &TargetX86Base<TraitsType>::_and; |
| 3766 Op_Hi = &TargetX86Base<Machine>::_and; | 3753 Op_Hi = &TargetX86Base<TraitsType>::_and; |
| 3767 break; | 3754 break; |
| 3768 case Intrinsics::AtomicXor: | 3755 case Intrinsics::AtomicXor: |
| 3769 NeedsCmpxchg = true; | 3756 NeedsCmpxchg = true; |
| 3770 Op_Lo = &TargetX86Base<Machine>::_xor; | 3757 Op_Lo = &TargetX86Base<TraitsType>::_xor; |
| 3771 Op_Hi = &TargetX86Base<Machine>::_xor; | 3758 Op_Hi = &TargetX86Base<TraitsType>::_xor; |
| 3772 break; | 3759 break; |
| 3773 case Intrinsics::AtomicExchange: | 3760 case Intrinsics::AtomicExchange: |
| 3774 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3761 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3775 NeedsCmpxchg = true; | 3762 NeedsCmpxchg = true; |
| 3776 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | 3763 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 3777 // just need to be moved to the ecx and ebx registers. | 3764 // just need to be moved to the ecx and ebx registers. |
| 3778 Op_Lo = nullptr; | 3765 Op_Lo = nullptr; |
| 3779 Op_Hi = nullptr; | 3766 Op_Hi = nullptr; |
| 3780 break; | 3767 break; |
| 3781 } | 3768 } |
| 3782 typename Traits::X86OperandMem *Addr = | 3769 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3783 formMemoryOperand(Ptr, Dest->getType()); | |
| 3784 Variable *T = nullptr; | 3770 Variable *T = nullptr; |
| 3785 _mov(T, Val); | 3771 _mov(T, Val); |
| 3786 _xchg(Addr, T); | 3772 _xchg(Addr, T); |
| 3787 _mov(Dest, T); | 3773 _mov(Dest, T); |
| 3788 return; | 3774 return; |
| 3789 } | 3775 } |
| 3790 // Otherwise, we need a cmpxchg loop. | 3776 // Otherwise, we need a cmpxchg loop. |
| 3791 (void)NeedsCmpxchg; | 3777 (void)NeedsCmpxchg; |
| 3792 assert(NeedsCmpxchg); | 3778 assert(NeedsCmpxchg); |
| 3793 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); | 3779 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); |
| 3794 } | 3780 } |
| 3795 | 3781 |
| 3796 template <class Machine> | 3782 template <typename TraitsType> |
| 3797 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, | 3783 void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, |
| 3798 LowerBinOp Op_Hi, | 3784 LowerBinOp Op_Hi, |
| 3799 Variable *Dest, | 3785 Variable *Dest, |
| 3800 Operand *Ptr, | 3786 Operand *Ptr, |
| 3801 Operand *Val) { | 3787 Operand *Val) { |
| 3802 // Expand a more complex RMW operation as a cmpxchg loop: | 3788 // Expand a more complex RMW operation as a cmpxchg loop: |
| 3803 // For 64-bit: | 3789 // For 64-bit: |
| 3804 // mov eax, [ptr] | 3790 // mov eax, [ptr] |
| 3805 // mov edx, [ptr + 4] | 3791 // mov edx, [ptr + 4] |
| 3806 // .LABEL: | 3792 // .LABEL: |
| 3807 // mov ebx, eax | 3793 // mov ebx, eax |
| 3808 // <Op_Lo> ebx, <desired_adj_lo> | 3794 // <Op_Lo> ebx, <desired_adj_lo> |
| 3809 // mov ecx, edx | 3795 // mov ecx, edx |
| 3810 // <Op_Hi> ecx, <desired_adj_hi> | 3796 // <Op_Hi> ecx, <desired_adj_hi> |
| 3811 // lock cmpxchg8b [ptr] | 3797 // lock cmpxchg8b [ptr] |
| 3812 // jne .LABEL | 3798 // jne .LABEL |
| 3813 // mov <dest_lo>, eax | 3799 // mov <dest_lo>, eax |
| 3814 // mov <dest_lo>, edx | 3800 // mov <dest_lo>, edx |
| 3815 // | 3801 // |
| 3816 // For 32-bit: | 3802 // For 32-bit: |
| 3817 // mov eax, [ptr] | 3803 // mov eax, [ptr] |
| 3818 // .LABEL: | 3804 // .LABEL: |
| 3819 // mov <reg>, eax | 3805 // mov <reg>, eax |
| 3820 // op <reg>, [desired_adj] | 3806 // op <reg>, [desired_adj] |
| 3821 // lock cmpxchg [ptr], <reg> | 3807 // lock cmpxchg [ptr], <reg> |
| 3822 // jne .LABEL | 3808 // jne .LABEL |
| 3823 // mov <dest>, eax | 3809 // mov <dest>, eax |
| 3824 // | 3810 // |
| 3825 // If Op_{Lo,Hi} are nullptr, then just copy the value. | 3811 // If Op_{Lo,Hi} are nullptr, then just copy the value. |
| 3826 Val = legalize(Val); | 3812 Val = legalize(Val); |
| 3827 Type Ty = Val->getType(); | 3813 Type Ty = Val->getType(); |
| 3828 if (!Traits::Is64Bit && Ty == IceType_i64) { | 3814 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 3829 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3815 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3830 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3816 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3831 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3817 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3832 _mov(T_eax, loOperand(Addr)); | 3818 _mov(T_eax, loOperand(Addr)); |
| 3833 _mov(T_edx, hiOperand(Addr)); | 3819 _mov(T_edx, hiOperand(Addr)); |
| 3834 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3820 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3835 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3821 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3836 typename Traits::Insts::Label *Label = | 3822 InstX86Label *Label = InstX86Label::create(Func, this); |
| 3837 Traits::Insts::Label::create(Func, this); | |
| 3838 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; | 3823 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; |
| 3839 if (!IsXchg8b) { | 3824 if (!IsXchg8b) { |
| 3840 Context.insert(Label); | 3825 Context.insert(Label); |
| 3841 _mov(T_ebx, T_eax); | 3826 _mov(T_ebx, T_eax); |
| 3842 (this->*Op_Lo)(T_ebx, loOperand(Val)); | 3827 (this->*Op_Lo)(T_ebx, loOperand(Val)); |
| 3843 _mov(T_ecx, T_edx); | 3828 _mov(T_ecx, T_edx); |
| 3844 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3829 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
| 3845 } else { | 3830 } else { |
| 3846 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3831 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
| 3847 // It just needs the Val loaded into ebx and ecx. | 3832 // It just needs the Val loaded into ebx and ecx. |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 3869 } | 3854 } |
| 3870 // The address base (if any) is also reused in the loop. | 3855 // The address base (if any) is also reused in the loop. |
| 3871 if (Variable *Base = Addr->getBase()) | 3856 if (Variable *Base = Addr->getBase()) |
| 3872 Context.insert<InstFakeUse>(Base); | 3857 Context.insert<InstFakeUse>(Base); |
| 3873 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3858 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3874 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3859 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3875 _mov(DestLo, T_eax); | 3860 _mov(DestLo, T_eax); |
| 3876 _mov(DestHi, T_edx); | 3861 _mov(DestHi, T_edx); |
| 3877 return; | 3862 return; |
| 3878 } | 3863 } |
| 3879 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3864 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3880 int32_t Eax; | 3865 int32_t Eax; |
| 3881 switch (Ty) { | 3866 switch (Ty) { |
| 3882 default: | 3867 default: |
| 3883 llvm::report_fatal_error("Bad type for atomicRMW"); | 3868 llvm::report_fatal_error("Bad type for atomicRMW"); |
| 3884 case IceType_i64: | 3869 case IceType_i64: |
| 3885 Eax = Traits::getRaxOrDie(); | 3870 Eax = Traits::getRaxOrDie(); |
| 3886 break; | 3871 break; |
| 3887 case IceType_i32: | 3872 case IceType_i32: |
| 3888 Eax = Traits::RegisterSet::Reg_eax; | 3873 Eax = Traits::RegisterSet::Reg_eax; |
| 3889 break; | 3874 break; |
| 3890 case IceType_i16: | 3875 case IceType_i16: |
| 3891 Eax = Traits::RegisterSet::Reg_ax; | 3876 Eax = Traits::RegisterSet::Reg_ax; |
| 3892 break; | 3877 break; |
| 3893 case IceType_i8: | 3878 case IceType_i8: |
| 3894 Eax = Traits::RegisterSet::Reg_al; | 3879 Eax = Traits::RegisterSet::Reg_al; |
| 3895 break; | 3880 break; |
| 3896 } | 3881 } |
| 3897 Variable *T_eax = makeReg(Ty, Eax); | 3882 Variable *T_eax = makeReg(Ty, Eax); |
| 3898 _mov(T_eax, Addr); | 3883 _mov(T_eax, Addr); |
| 3899 auto *Label = Context.insert<typename Traits::Insts::Label>(this); | 3884 auto *Label = Context.insert<InstX86Label>(this); |
| 3900 // We want to pick a different register for T than Eax, so don't use | 3885 // We want to pick a different register for T than Eax, so don't use |
| 3901 // _mov(T == nullptr, T_eax). | 3886 // _mov(T == nullptr, T_eax). |
| 3902 Variable *T = makeReg(Ty); | 3887 Variable *T = makeReg(Ty); |
| 3903 _mov(T, T_eax); | 3888 _mov(T, T_eax); |
| 3904 (this->*Op_Lo)(T, Val); | 3889 (this->*Op_Lo)(T, Val); |
| 3905 constexpr bool Locked = true; | 3890 constexpr bool Locked = true; |
| 3906 _cmpxchg(Addr, T_eax, T, Locked); | 3891 _cmpxchg(Addr, T_eax, T, Locked); |
| 3907 _br(Traits::Cond::Br_ne, Label); | 3892 _br(Traits::Cond::Br_ne, Label); |
| 3908 // If Val is a variable, model the extended live range of Val through | 3893 // If Val is a variable, model the extended live range of Val through |
| 3909 // the end of the loop, since it will be re-used by the loop. | 3894 // the end of the loop, since it will be re-used by the loop. |
| 3910 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3895 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3911 Context.insert<InstFakeUse>(ValVar); | 3896 Context.insert<InstFakeUse>(ValVar); |
| 3912 } | 3897 } |
| 3913 // The address base (if any) is also reused in the loop. | 3898 // The address base (if any) is also reused in the loop. |
| 3914 if (Variable *Base = Addr->getBase()) | 3899 if (Variable *Base = Addr->getBase()) |
| 3915 Context.insert<InstFakeUse>(Base); | 3900 Context.insert<InstFakeUse>(Base); |
| 3916 _mov(Dest, T_eax); | 3901 _mov(Dest, T_eax); |
| 3917 } | 3902 } |
| 3918 | 3903 |
| 3919 /// Lowers count {trailing, leading} zeros intrinsic. | 3904 /// Lowers count {trailing, leading} zeros intrinsic. |
| 3920 /// | 3905 /// |
| 3921 /// We could do constant folding here, but that should have | 3906 /// We could do constant folding here, but that should have |
| 3922 /// been done by the front-end/middle-end optimizations. | 3907 /// been done by the front-end/middle-end optimizations. |
| 3923 template <class Machine> | 3908 template <typename TraitsType> |
| 3924 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3909 void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty, |
| 3925 Operand *FirstVal, | 3910 Variable *Dest, |
| 3926 Operand *SecondVal) { | 3911 Operand *FirstVal, |
| 3912 Operand *SecondVal) { | |
| 3927 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). | 3913 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
| 3928 // Then the instructions will handle the Val == 0 case much more simply | 3914 // Then the instructions will handle the Val == 0 case much more simply |
| 3929 // and won't require conversion from bit position to number of zeros. | 3915 // and won't require conversion from bit position to number of zeros. |
| 3930 // | 3916 // |
| 3931 // Otherwise: | 3917 // Otherwise: |
| 3932 // bsr IF_NOT_ZERO, Val | 3918 // bsr IF_NOT_ZERO, Val |
| 3933 // mov T_DEST, 63 | 3919 // mov T_DEST, 63 |
| 3934 // cmovne T_DEST, IF_NOT_ZERO | 3920 // cmovne T_DEST, IF_NOT_ZERO |
| 3935 // xor T_DEST, 31 | 3921 // xor T_DEST, 31 |
| 3936 // mov DEST, T_DEST | 3922 // mov DEST, T_DEST |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3988 } else { | 3974 } else { |
| 3989 _bsr(T_Dest2, SecondVar); | 3975 _bsr(T_Dest2, SecondVar); |
| 3990 _xor(T_Dest2, _31); | 3976 _xor(T_Dest2, _31); |
| 3991 } | 3977 } |
| 3992 _test(SecondVar, SecondVar); | 3978 _test(SecondVar, SecondVar); |
| 3993 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3979 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3994 _mov(DestLo, T_Dest2); | 3980 _mov(DestLo, T_Dest2); |
| 3995 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3981 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3996 } | 3982 } |
| 3997 | 3983 |
| 3998 template <class Machine> | 3984 template <typename TraitsType> |
| 3999 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, | 3985 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, |
| 4000 Constant *Offset) { | 3986 Variable *Base, Constant *Offset) { |
| 4001 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 3987 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
| 4002 | 3988 |
| 4003 if (isVectorType(Ty)) | 3989 if (isVectorType(Ty)) |
| 4004 _movp(Dest, Mem); | 3990 _movp(Dest, Mem); |
| 4005 else if (Ty == IceType_f64) | 3991 else if (Ty == IceType_f64) |
| 4006 _movq(Dest, Mem); | 3992 _movq(Dest, Mem); |
| 4007 else | 3993 else |
| 4008 _mov(Dest, Mem); | 3994 _mov(Dest, Mem); |
| 4009 } | 3995 } |
| 4010 | 3996 |
| 4011 template <class Machine> | 3997 template <typename TraitsType> |
| 4012 void TargetX86Base<Machine>::typedStore(Type Ty, Variable *Value, | 3998 void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value, |
| 4013 Variable *Base, Constant *Offset) { | 3999 Variable *Base, Constant *Offset) { |
| 4014 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4000 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
| 4015 | 4001 |
| 4016 if (isVectorType(Ty)) | 4002 if (isVectorType(Ty)) |
| 4017 _storep(Value, Mem); | 4003 _storep(Value, Mem); |
| 4018 else if (Ty == IceType_f64) | 4004 else if (Ty == IceType_f64) |
| 4019 _storeq(Value, Mem); | 4005 _storeq(Value, Mem); |
| 4020 else | 4006 else |
| 4021 _store(Value, Mem); | 4007 _store(Value, Mem); |
| 4022 } | 4008 } |
| 4023 | 4009 |
| 4024 template <class Machine> | 4010 template <typename TraitsType> |
| 4025 void TargetX86Base<Machine>::copyMemory(Type Ty, Variable *Dest, Variable *Src, | 4011 void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest, |
| 4026 int32_t OffsetAmt) { | 4012 Variable *Src, int32_t OffsetAmt) { |
| 4027 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; | 4013 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; |
| 4028 // TODO(ascull): this or add nullptr test to _movp, _movq | 4014 // TODO(ascull): this or add nullptr test to _movp, _movq |
| 4029 Variable *Data = makeReg(Ty); | 4015 Variable *Data = makeReg(Ty); |
| 4030 | 4016 |
| 4031 typedLoad(Ty, Data, Src, Offset); | 4017 typedLoad(Ty, Data, Src, Offset); |
| 4032 typedStore(Ty, Data, Dest, Offset); | 4018 typedStore(Ty, Data, Dest, Offset); |
| 4033 } | 4019 } |
| 4034 | 4020 |
| 4035 template <class Machine> | 4021 template <typename TraitsType> |
| 4036 void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src, | 4022 void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src, |
| 4037 Operand *Count) { | 4023 Operand *Count) { |
| 4038 // There is a load and store for each chunk in the unroll | 4024 // There is a load and store for each chunk in the unroll |
| 4039 constexpr uint32_t BytesPerStorep = 16; | 4025 constexpr uint32_t BytesPerStorep = 16; |
| 4040 | 4026 |
| 4041 // Check if the operands are constants | 4027 // Check if the operands are constants |
| 4042 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | 4028 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
| 4043 const bool IsCountConst = CountConst != nullptr; | 4029 const bool IsCountConst = CountConst != nullptr; |
| 4044 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; | 4030 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; |
| 4045 | 4031 |
| 4046 if (shouldOptimizeMemIntrins() && IsCountConst && | 4032 if (shouldOptimizeMemIntrins() && IsCountConst && |
| 4047 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) { | 4033 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) { |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4079 } | 4065 } |
| 4080 | 4066 |
| 4081 // Fall back on a function call | 4067 // Fall back on a function call |
| 4082 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3); | 4068 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3); |
| 4083 Call->addArg(Dest); | 4069 Call->addArg(Dest); |
| 4084 Call->addArg(Src); | 4070 Call->addArg(Src); |
| 4085 Call->addArg(Count); | 4071 Call->addArg(Count); |
| 4086 lowerCall(Call); | 4072 lowerCall(Call); |
| 4087 } | 4073 } |
| 4088 | 4074 |
| 4089 template <class Machine> | 4075 template <typename TraitsType> |
| 4090 void TargetX86Base<Machine>::lowerMemmove(Operand *Dest, Operand *Src, | 4076 void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src, |
| 4091 Operand *Count) { | 4077 Operand *Count) { |
| 4092 // There is a load and store for each chunk in the unroll | 4078 // There is a load and store for each chunk in the unroll |
| 4093 constexpr uint32_t BytesPerStorep = 16; | 4079 constexpr uint32_t BytesPerStorep = 16; |
| 4094 | 4080 |
| 4095 // Check if the operands are constants | 4081 // Check if the operands are constants |
| 4096 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | 4082 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
| 4097 const bool IsCountConst = CountConst != nullptr; | 4083 const bool IsCountConst = CountConst != nullptr; |
| 4098 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; | 4084 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; |
| 4099 | 4085 |
| 4100 if (shouldOptimizeMemIntrins() && IsCountConst && | 4086 if (shouldOptimizeMemIntrins() && IsCountConst && |
| 4101 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) { | 4087 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) { |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4151 } | 4137 } |
| 4152 | 4138 |
| 4153 // Fall back on a function call | 4139 // Fall back on a function call |
| 4154 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); | 4140 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); |
| 4155 Call->addArg(Dest); | 4141 Call->addArg(Dest); |
| 4156 Call->addArg(Src); | 4142 Call->addArg(Src); |
| 4157 Call->addArg(Count); | 4143 Call->addArg(Count); |
| 4158 lowerCall(Call); | 4144 lowerCall(Call); |
| 4159 } | 4145 } |
| 4160 | 4146 |
| 4161 template <class Machine> | 4147 template <typename TraitsType> |
| 4162 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, | 4148 void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val, |
| 4163 Operand *Count) { | 4149 Operand *Count) { |
| 4164 constexpr uint32_t BytesPerStorep = 16; | 4150 constexpr uint32_t BytesPerStorep = 16; |
| 4165 constexpr uint32_t BytesPerStoreq = 8; | 4151 constexpr uint32_t BytesPerStoreq = 8; |
| 4166 constexpr uint32_t BytesPerStorei32 = 4; | 4152 constexpr uint32_t BytesPerStorei32 = 4; |
| 4167 assert(Val->getType() == IceType_i8); | 4153 assert(Val->getType() == IceType_i8); |
| 4168 | 4154 |
| 4169 // Check if the operands are constants | 4155 // Check if the operands are constants |
| 4170 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | 4156 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
| 4171 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); | 4157 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); |
| 4172 const bool IsCountConst = CountConst != nullptr; | 4158 const bool IsCountConst = CountConst != nullptr; |
| 4173 const bool IsValConst = ValConst != nullptr; | 4159 const bool IsValConst = ValConst != nullptr; |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 4186 Variable *VecReg = nullptr; | 4172 Variable *VecReg = nullptr; |
| 4187 const uint32_t SpreadValue = | 4173 const uint32_t SpreadValue = |
| 4188 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; | 4174 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; |
| 4189 | 4175 |
| 4190 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty, | 4176 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty, |
| 4191 uint32_t OffsetAmt) { | 4177 uint32_t OffsetAmt) { |
| 4192 assert(Base != nullptr); | 4178 assert(Base != nullptr); |
| 4193 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; | 4179 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; |
| 4194 | 4180 |
| 4195 // TODO(ascull): is 64-bit better with vector or scalar movq? | 4181 // TODO(ascull): is 64-bit better with vector or scalar movq? |
| 4196 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4182 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
| 4197 if (isVectorType(Ty)) { | 4183 if (isVectorType(Ty)) { |
| 4198 assert(VecReg != nullptr); | 4184 assert(VecReg != nullptr); |
| 4199 _storep(VecReg, Mem); | 4185 _storep(VecReg, Mem); |
| 4200 } else if (Ty == IceType_f64) { | 4186 } else if (Ty == IceType_f64) { |
| 4201 assert(VecReg != nullptr); | 4187 assert(VecReg != nullptr); |
| 4202 _storeq(VecReg, Mem); | 4188 _storeq(VecReg, Mem); |
| 4203 } else { | 4189 } else { |
| 4204 assert(Ty != IceType_i64); | 4190 assert(Ty != IceType_i64); |
| 4205 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); | 4191 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); |
| 4206 } | 4192 } |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4260 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); | 4246 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); |
| 4261 ValExt = ValExtVar; | 4247 ValExt = ValExtVar; |
| 4262 } | 4248 } |
| 4263 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | 4249 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); |
| 4264 Call->addArg(Dest); | 4250 Call->addArg(Dest); |
| 4265 Call->addArg(ValExt); | 4251 Call->addArg(ValExt); |
| 4266 Call->addArg(Count); | 4252 Call->addArg(Count); |
| 4267 lowerCall(Call); | 4253 lowerCall(Call); |
| 4268 } | 4254 } |
| 4269 | 4255 |
| 4270 template <class Machine> | 4256 template <typename TraitsType> |
| 4271 void TargetX86Base<Machine>::lowerIndirectJump(Variable *JumpTarget) { | 4257 void TargetX86Base<TraitsType>::lowerIndirectJump(Variable *JumpTarget) { |
| 4272 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 4258 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 4273 if (Traits::Is64Bit) { | 4259 if (Traits::Is64Bit) { |
| 4274 Variable *T = makeReg(IceType_i64); | 4260 Variable *T = makeReg(IceType_i64); |
| 4275 _movzx(T, JumpTarget); | 4261 _movzx(T, JumpTarget); |
| 4276 JumpTarget = T; | 4262 JumpTarget = T; |
| 4277 } | 4263 } |
| 4278 if (NeedSandboxing) { | 4264 if (NeedSandboxing) { |
| 4279 _bundle_lock(); | 4265 _bundle_lock(); |
| 4280 const SizeT BundleSize = | 4266 const SizeT BundleSize = |
| 4281 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 4267 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| (...skipping 376 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4658 /// For the purpose of mocking the bounds check, we'll do something like this: | 4644 /// For the purpose of mocking the bounds check, we'll do something like this: |
| 4659 /// | 4645 /// |
| 4660 /// cmp reg, 0 | 4646 /// cmp reg, 0 |
| 4661 /// je label | 4647 /// je label |
| 4662 /// cmp reg, 1 | 4648 /// cmp reg, 1 |
| 4663 /// je label | 4649 /// je label |
| 4664 /// label: | 4650 /// label: |
| 4665 /// | 4651 /// |
| 4666 /// Also note that we don't need to add a bounds check to a dereference of a | 4652 /// Also note that we don't need to add a bounds check to a dereference of a |
| 4667 /// simple global variable address. | 4653 /// simple global variable address. |
| 4668 template <class Machine> | 4654 template <typename TraitsType> |
| 4669 void TargetX86Base<Machine>::doMockBoundsCheck(Operand *Opnd) { | 4655 void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) { |
| 4670 if (!Ctx->getFlags().getMockBoundsCheck()) | 4656 if (!Ctx->getFlags().getMockBoundsCheck()) |
| 4671 return; | 4657 return; |
| 4672 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd)) { | 4658 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) { |
| 4673 if (Mem->getIndex()) { | 4659 if (Mem->getIndex()) { |
| 4674 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg"); | 4660 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg"); |
| 4675 } | 4661 } |
| 4676 Opnd = Mem->getBase(); | 4662 Opnd = Mem->getBase(); |
| 4677 } | 4663 } |
| 4678 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps | 4664 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps |
| 4679 // something else. We only care if it is Variable. | 4665 // something else. We only care if it is Variable. |
| 4680 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); | 4666 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); |
| 4681 if (Var == nullptr) | 4667 if (Var == nullptr) |
| 4682 return; | 4668 return; |
| 4683 // We use lowerStore() to copy out-args onto the stack. This creates a memory | 4669 // We use lowerStore() to copy out-args onto the stack. This creates a memory |
| 4684 // operand with the stack pointer as the base register. Don't do bounds | 4670 // operand with the stack pointer as the base register. Don't do bounds |
| 4685 // checks on that. | 4671 // checks on that. |
| 4686 if (Var->getRegNum() == static_cast<int32_t>(getStackReg())) | 4672 if (Var->getRegNum() == static_cast<int32_t>(getStackReg())) |
| 4687 return; | 4673 return; |
| 4688 | 4674 |
| 4689 auto *Label = Traits::Insts::Label::create(Func, this); | 4675 auto *Label = InstX86Label::create(Func, this); |
| 4690 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); | 4676 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); |
| 4691 _br(Traits::Cond::Br_e, Label); | 4677 _br(Traits::Cond::Br_e, Label); |
| 4692 _cmp(Opnd, Ctx->getConstantInt32(1)); | 4678 _cmp(Opnd, Ctx->getConstantInt32(1)); |
| 4693 _br(Traits::Cond::Br_e, Label); | 4679 _br(Traits::Cond::Br_e, Label); |
| 4694 Context.insert(Label); | 4680 Context.insert(Label); |
| 4695 } | 4681 } |
| 4696 | 4682 |
| 4697 template <class Machine> | 4683 template <typename TraitsType> |
| 4698 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { | 4684 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { |
| 4699 // A Load instruction can be treated the same as an Assign instruction, after | 4685 // A Load instruction can be treated the same as an Assign instruction, after |
| 4700 // the source operand is transformed into an Traits::X86OperandMem operand. | 4686 // the source operand is transformed into an X86OperandMem operand. |
| 4701 // Note that the address mode optimization already creates an | 4687 // Note that the address mode optimization already creates an |
| 4702 // Traits::X86OperandMem operand, so it doesn't need another level of | 4688 // X86OperandMem operand, so it doesn't need another level of |
| 4703 // transformation. | 4689 // transformation. |
| 4704 Variable *DestLoad = Load->getDest(); | 4690 Variable *DestLoad = Load->getDest(); |
| 4705 Type Ty = DestLoad->getType(); | 4691 Type Ty = DestLoad->getType(); |
| 4706 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 4692 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
| 4707 doMockBoundsCheck(Src0); | 4693 doMockBoundsCheck(Src0); |
| 4708 auto *Assign = InstAssign::create(Func, DestLoad, Src0); | 4694 auto *Assign = InstAssign::create(Func, DestLoad, Src0); |
| 4709 lowerAssign(Assign); | 4695 lowerAssign(Assign); |
| 4710 } | 4696 } |
| 4711 | 4697 |
| 4712 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { | 4698 template <typename TraitsType> |
| 4699 void TargetX86Base<TraitsType>::doAddressOptLoad() { | |
| 4713 Inst *Inst = Context.getCur(); | 4700 Inst *Inst = Context.getCur(); |
| 4714 Variable *Dest = Inst->getDest(); | 4701 Variable *Dest = Inst->getDest(); |
| 4715 Operand *Addr = Inst->getSrc(0); | 4702 Operand *Addr = Inst->getSrc(0); |
| 4716 Variable *Index = nullptr; | 4703 Variable *Index = nullptr; |
| 4717 ConstantRelocatable *Relocatable = nullptr; | 4704 ConstantRelocatable *Relocatable = nullptr; |
| 4718 uint16_t Shift = 0; | 4705 uint16_t Shift = 0; |
| 4719 int32_t Offset = 0; | 4706 int32_t Offset = 0; |
| 4720 // Vanilla ICE load instructions should not use the segment registers, and | 4707 // Vanilla ICE load instructions should not use the segment registers, and |
| 4721 // computeAddressOpt only works at the level of Variables and Constants, not | 4708 // computeAddressOpt only works at the level of Variables and Constants, not |
| 4722 // other Traits::X86OperandMem, so there should be no mention of segment | 4709 // other X86OperandMem, so there should be no mention of segment |
| 4723 // registers there either. | 4710 // registers there either. |
| 4724 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = | 4711 const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment; |
| 4725 Traits::X86OperandMem::DefaultSegment; | |
| 4726 auto *Base = llvm::dyn_cast<Variable>(Addr); | 4712 auto *Base = llvm::dyn_cast<Variable>(Addr); |
| 4727 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { | 4713 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
| 4728 Inst->setDeleted(); | 4714 Inst->setDeleted(); |
| 4729 Constant *OffsetOp = nullptr; | 4715 Constant *OffsetOp = nullptr; |
| 4730 if (Relocatable == nullptr) { | 4716 if (Relocatable == nullptr) { |
| 4731 OffsetOp = Ctx->getConstantInt32(Offset); | 4717 OffsetOp = Ctx->getConstantInt32(Offset); |
| 4732 } else { | 4718 } else { |
| 4733 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 4719 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
| 4734 Relocatable->getName(), | 4720 Relocatable->getName(), |
| 4735 Relocatable->getSuppressMangling()); | 4721 Relocatable->getSuppressMangling()); |
| 4736 } | 4722 } |
| 4737 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, | 4723 Addr = X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
| 4738 Index, Shift, SegmentReg); | 4724 Shift, SegmentReg); |
| 4739 Context.insert<InstLoad>(Dest, Addr); | 4725 Context.insert<InstLoad>(Dest, Addr); |
| 4740 } | 4726 } |
| 4741 } | 4727 } |
| 4742 | 4728 |
| 4743 template <class Machine> | 4729 template <typename TraitsType> |
| 4744 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, | 4730 void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability, |
| 4745 RandomNumberGenerator &RNG) { | 4731 RandomNumberGenerator &RNG) { |
| 4746 RandomNumberGeneratorWrapper RNGW(RNG); | 4732 RandomNumberGeneratorWrapper RNGW(RNG); |
| 4747 if (RNGW.getTrueWithProbability(Probability)) { | 4733 if (RNGW.getTrueWithProbability(Probability)) { |
| 4748 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 4734 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
| 4749 } | 4735 } |
| 4750 } | 4736 } |
| 4751 | 4737 |
| 4752 template <class Machine> | 4738 template <typename TraitsType> |
| 4753 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | 4739 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { |
| 4754 Func->setError("Phi found in regular instruction list"); | 4740 Func->setError("Phi found in regular instruction list"); |
| 4755 } | 4741 } |
| 4756 | 4742 |
| 4757 template <class Machine> | 4743 template <typename TraitsType> |
| 4758 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) { | 4744 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { |
| 4759 Variable *Dest = Select->getDest(); | 4745 Variable *Dest = Select->getDest(); |
| 4760 | 4746 |
| 4761 if (isVectorType(Dest->getType())) { | 4747 if (isVectorType(Dest->getType())) { |
| 4762 lowerSelectVector(Select); | 4748 lowerSelectVector(Select); |
| 4763 return; | 4749 return; |
| 4764 } | 4750 } |
| 4765 | 4751 |
| 4766 Operand *Condition = Select->getCondition(); | 4752 Operand *Condition = Select->getCondition(); |
| 4767 // Handle folding opportunities. | 4753 // Handle folding opportunities. |
| 4768 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4754 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 4780 return; | 4766 return; |
| 4781 } | 4767 } |
| 4782 } | 4768 } |
| 4783 } | 4769 } |
| 4784 | 4770 |
| 4785 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); | 4771 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4786 Operand *Zero = Ctx->getConstantZero(IceType_i32); | 4772 Operand *Zero = Ctx->getConstantZero(IceType_i32); |
| 4787 _cmp(CmpResult, Zero); | 4773 _cmp(CmpResult, Zero); |
| 4788 Operand *SrcT = Select->getTrueOperand(); | 4774 Operand *SrcT = Select->getTrueOperand(); |
| 4789 Operand *SrcF = Select->getFalseOperand(); | 4775 Operand *SrcF = Select->getFalseOperand(); |
| 4790 const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; | 4776 const BrCond Cond = Traits::Cond::Br_ne; |
| 4791 lowerSelectMove(Dest, Cond, SrcT, SrcF); | 4777 lowerSelectMove(Dest, Cond, SrcT, SrcF); |
| 4792 } | 4778 } |
| 4793 | 4779 |
| 4794 template <class Machine> | 4780 template <typename TraitsType> |
| 4795 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, | 4781 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, |
| 4796 typename Traits::Cond::BrCond Cond, | 4782 Operand *SrcT, Operand *SrcF) { |
| 4797 Operand *SrcT, Operand *SrcF) { | |
| 4798 Type DestTy = Dest->getType(); | 4783 Type DestTy = Dest->getType(); |
| 4799 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { | 4784 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
| 4800 // The cmov instruction doesn't allow 8-bit or FP operands, so we need | 4785 // The cmov instruction doesn't allow 8-bit or FP operands, so we need |
| 4801 // explicit control flow. | 4786 // explicit control flow. |
| 4802 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: | 4787 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
| 4803 auto *Label = Traits::Insts::Label::create(Func, this); | 4788 auto *Label = InstX86Label::create(Func, this); |
| 4804 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | 4789 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
| 4805 _mov(Dest, SrcT); | 4790 _mov(Dest, SrcT); |
| 4806 _br(Cond, Label); | 4791 _br(Cond, Label); |
| 4807 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | 4792 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
| 4808 _redefined(_mov(Dest, SrcF)); | 4793 _redefined(_mov(Dest, SrcF)); |
| 4809 Context.insert(Label); | 4794 Context.insert(Label); |
| 4810 return; | 4795 return; |
| 4811 } | 4796 } |
| 4812 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4797 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
| 4813 // But if SrcT is immediate, we might be able to do better, as the cmov | 4798 // But if SrcT is immediate, we might be able to do better, as the cmov |
| 4814 // instruction doesn't allow an immediate operand: | 4799 // instruction doesn't allow an immediate operand: |
| 4815 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t | 4800 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t |
| 4816 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { | 4801 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { |
| 4817 std::swap(SrcT, SrcF); | 4802 std::swap(SrcT, SrcF); |
| 4818 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); | 4803 Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond); |
| 4819 } | 4804 } |
| 4820 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 4805 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 4821 SrcT = legalizeUndef(SrcT); | 4806 SrcT = legalizeUndef(SrcT); |
| 4822 SrcF = legalizeUndef(SrcF); | 4807 SrcF = legalizeUndef(SrcF); |
| 4823 // Set the low portion. | 4808 // Set the low portion. |
| 4824 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4809 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 4825 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); | 4810 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); |
| 4826 // Set the high portion. | 4811 // Set the high portion. |
| 4827 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4812 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4828 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); | 4813 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); |
| 4829 return; | 4814 return; |
| 4830 } | 4815 } |
| 4831 | 4816 |
| 4832 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || | 4817 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || |
| 4833 (Traits::Is64Bit && DestTy == IceType_i64)); | 4818 (Traits::Is64Bit && DestTy == IceType_i64)); |
| 4834 lowerSelectIntMove(Dest, Cond, SrcT, SrcF); | 4819 lowerSelectIntMove(Dest, Cond, SrcT, SrcF); |
| 4835 } | 4820 } |
| 4836 | 4821 |
| 4837 template <class Machine> | 4822 template <typename TraitsType> |
| 4838 void TargetX86Base<Machine>::lowerSelectIntMove( | 4823 void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond, |
| 4839 Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT, | 4824 Operand *SrcT, |
| 4840 Operand *SrcF) { | 4825 Operand *SrcF) { |
| 4841 Variable *T = nullptr; | 4826 Variable *T = nullptr; |
| 4842 SrcF = legalize(SrcF); | 4827 SrcF = legalize(SrcF); |
| 4843 _mov(T, SrcF); | 4828 _mov(T, SrcF); |
| 4844 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4829 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4845 _cmov(T, SrcT, Cond); | 4830 _cmov(T, SrcT, Cond); |
| 4846 _mov(Dest, T); | 4831 _mov(Dest, T); |
| 4847 } | 4832 } |
| 4848 | 4833 |
| 4849 template <class Machine> | 4834 template <typename TraitsType> |
| 4850 void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src, | 4835 void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src, |
| 4851 bool IsRedefinition) { | 4836 bool IsRedefinition) { |
| 4852 assert(Dest->getType() == Src->getType()); | 4837 assert(Dest->getType() == Src->getType()); |
| 4853 assert(!Dest->isRematerializable()); | 4838 assert(!Dest->isRematerializable()); |
| 4854 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 4839 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 4855 Src = legalize(Src); | 4840 Src = legalize(Src); |
| 4856 Operand *SrcLo = loOperand(Src); | 4841 Operand *SrcLo = loOperand(Src); |
| 4857 Operand *SrcHi = hiOperand(Src); | 4842 Operand *SrcHi = hiOperand(Src); |
| 4858 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4843 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 4859 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4844 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4860 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 4845 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 4861 _mov(T_Lo, SrcLo); | 4846 _mov(T_Lo, SrcLo); |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 4875 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); | 4860 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); |
| 4876 } | 4861 } |
| 4877 if (isVectorType(Dest->getType())) { | 4862 if (isVectorType(Dest->getType())) { |
| 4878 _redefined(_movp(Dest, SrcLegal), IsRedefinition); | 4863 _redefined(_movp(Dest, SrcLegal), IsRedefinition); |
| 4879 } else { | 4864 } else { |
| 4880 _redefined(_mov(Dest, SrcLegal), IsRedefinition); | 4865 _redefined(_mov(Dest, SrcLegal), IsRedefinition); |
| 4881 } | 4866 } |
| 4882 } | 4867 } |
| 4883 } | 4868 } |
| 4884 | 4869 |
| 4885 template <class Machine> | 4870 template <typename TraitsType> |
| 4886 bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, | 4871 bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect( |
| 4887 const InstSelect *Select) { | 4872 const InstFcmp *Fcmp, const InstSelect *Select) { |
| 4888 Operand *CmpSrc0 = Fcmp->getSrc(0); | 4873 Operand *CmpSrc0 = Fcmp->getSrc(0); |
| 4889 Operand *CmpSrc1 = Fcmp->getSrc(1); | 4874 Operand *CmpSrc1 = Fcmp->getSrc(1); |
| 4890 Operand *SelectSrcT = Select->getTrueOperand(); | 4875 Operand *SelectSrcT = Select->getTrueOperand(); |
| 4891 Operand *SelectSrcF = Select->getFalseOperand(); | 4876 Operand *SelectSrcF = Select->getFalseOperand(); |
| 4892 | 4877 |
| 4893 if (CmpSrc0->getType() != SelectSrcT->getType()) | 4878 if (CmpSrc0->getType() != SelectSrcT->getType()) |
| 4894 return false; | 4879 return false; |
| 4895 | 4880 |
| 4896 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. | 4881 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. |
| 4897 InstFcmp::FCond Condition = Fcmp->getCondition(); | 4882 InstFcmp::FCond Condition = Fcmp->getCondition(); |
| 4898 switch (Condition) { | 4883 switch (Condition) { |
| 4899 default: | 4884 default: |
| 4900 return false; | 4885 return false; |
| 4901 case InstFcmp::True: | 4886 case InstFcmp::True: |
| 4902 case InstFcmp::False: | 4887 case InstFcmp::False: |
| 4903 case InstFcmp::Ogt: | 4888 case InstFcmp::Ogt: |
| 4904 case InstFcmp::Olt: | 4889 case InstFcmp::Olt: |
| 4905 (void)CmpSrc0; | 4890 (void)CmpSrc0; |
| 4906 (void)CmpSrc1; | 4891 (void)CmpSrc1; |
| 4907 (void)SelectSrcT; | 4892 (void)SelectSrcT; |
| 4908 (void)SelectSrcF; | 4893 (void)SelectSrcF; |
| 4909 break; | 4894 break; |
| 4910 } | 4895 } |
| 4911 return false; | 4896 return false; |
| 4912 } | 4897 } |
| 4913 | 4898 |
| 4914 template <class Machine> | 4899 template <typename TraitsType> |
| 4915 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) { | 4900 void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) { |
| 4916 Variable *Dest = Icmp->getDest(); | 4901 Variable *Dest = Icmp->getDest(); |
| 4917 if (isVectorType(Dest->getType())) { | 4902 if (isVectorType(Dest->getType())) { |
| 4918 lowerIcmpVector(Icmp); | 4903 lowerIcmpVector(Icmp); |
| 4919 } else { | 4904 } else { |
| 4920 constexpr Inst *Consumer = nullptr; | 4905 constexpr Inst *Consumer = nullptr; |
| 4921 lowerIcmpAndConsumer(Icmp, Consumer); | 4906 lowerIcmpAndConsumer(Icmp, Consumer); |
| 4922 } | 4907 } |
| 4923 } | 4908 } |
| 4924 | 4909 |
| 4925 template <class Machine> | 4910 template <typename TraitsType> |
| 4926 void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) { | 4911 void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Inst) { |
| 4927 Variable *Dest = Inst->getDest(); | 4912 Variable *Dest = Inst->getDest(); |
| 4928 Type DestTy = Dest->getType(); | 4913 Type DestTy = Dest->getType(); |
| 4929 Operand *SrcT = Inst->getTrueOperand(); | 4914 Operand *SrcT = Inst->getTrueOperand(); |
| 4930 Operand *SrcF = Inst->getFalseOperand(); | 4915 Operand *SrcF = Inst->getFalseOperand(); |
| 4931 Operand *Condition = Inst->getCondition(); | 4916 Operand *Condition = Inst->getCondition(); |
| 4932 | 4917 |
| 4933 if (!isVectorType(DestTy)) | 4918 if (!isVectorType(DestTy)) |
| 4934 llvm::report_fatal_error("Expected a vector select"); | 4919 llvm::report_fatal_error("Expected a vector select"); |
| 4935 | 4920 |
| 4936 Type SrcTy = SrcT->getType(); | 4921 Type SrcTy = SrcT->getType(); |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4983 } | 4968 } |
| 4984 _movp(T2, T); | 4969 _movp(T2, T); |
| 4985 _pand(T, SrcTRM); | 4970 _pand(T, SrcTRM); |
| 4986 _pandn(T2, SrcFRM); | 4971 _pandn(T2, SrcFRM); |
| 4987 _por(T, T2); | 4972 _por(T, T2); |
| 4988 _movp(Dest, T); | 4973 _movp(Dest, T); |
| 4989 | 4974 |
| 4990 return; | 4975 return; |
| 4991 } | 4976 } |
| 4992 | 4977 |
| 4993 template <class Machine> | 4978 template <typename TraitsType> |
| 4994 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | 4979 void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) { |
| 4995 Operand *Value = Inst->getData(); | 4980 Operand *Value = Inst->getData(); |
| 4996 Operand *Addr = Inst->getAddr(); | 4981 Operand *Addr = Inst->getAddr(); |
| 4997 typename Traits::X86OperandMem *NewAddr = | 4982 X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
| 4998 formMemoryOperand(Addr, Value->getType()); | |
| 4999 doMockBoundsCheck(NewAddr); | 4983 doMockBoundsCheck(NewAddr); |
| 5000 Type Ty = NewAddr->getType(); | 4984 Type Ty = NewAddr->getType(); |
| 5001 | 4985 |
| 5002 if (!Traits::Is64Bit && Ty == IceType_i64) { | 4986 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 5003 Value = legalizeUndef(Value); | 4987 Value = legalizeUndef(Value); |
| 5004 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); | 4988 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); |
| 5005 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); | 4989 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); |
| 5006 _store(ValueHi, | 4990 _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr))); |
| 5007 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); | 4991 _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr))); |
| 5008 _store(ValueLo, | |
| 5009 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); | |
| 5010 } else if (isVectorType(Ty)) { | 4992 } else if (isVectorType(Ty)) { |
| 5011 _storep(legalizeToReg(Value), NewAddr); | 4993 _storep(legalizeToReg(Value), NewAddr); |
| 5012 } else { | 4994 } else { |
| 5013 Value = legalize(Value, Legal_Reg | Legal_Imm); | 4995 Value = legalize(Value, Legal_Reg | Legal_Imm); |
| 5014 _store(Value, NewAddr); | 4996 _store(Value, NewAddr); |
| 5015 } | 4997 } |
| 5016 } | 4998 } |
| 5017 | 4999 |
| 5018 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { | 5000 template <typename TraitsType> |
| 5001 void TargetX86Base<TraitsType>::doAddressOptStore() { | |
| 5019 auto *Inst = llvm::cast<InstStore>(Context.getCur()); | 5002 auto *Inst = llvm::cast<InstStore>(Context.getCur()); |
| 5020 Operand *Data = Inst->getData(); | 5003 Operand *Data = Inst->getData(); |
| 5021 Operand *Addr = Inst->getAddr(); | 5004 Operand *Addr = Inst->getAddr(); |
| 5022 Variable *Index = nullptr; | 5005 Variable *Index = nullptr; |
| 5023 ConstantRelocatable *Relocatable = nullptr; | 5006 ConstantRelocatable *Relocatable = nullptr; |
| 5024 uint16_t Shift = 0; | 5007 uint16_t Shift = 0; |
| 5025 int32_t Offset = 0; | 5008 int32_t Offset = 0; |
| 5026 auto *Base = llvm::dyn_cast<Variable>(Addr); | 5009 auto *Base = llvm::dyn_cast<Variable>(Addr); |
| 5027 // Vanilla ICE store instructions should not use the segment registers, and | 5010 // Vanilla ICE store instructions should not use the segment registers, and |
| 5028 // computeAddressOpt only works at the level of Variables and Constants, not | 5011 // computeAddressOpt only works at the level of Variables and Constants, not |
| 5029 // other Traits::X86OperandMem, so there should be no mention of segment | 5012 // other X86OperandMem, so there should be no mention of segment |
| 5030 // registers there either. | 5013 // registers there either. |
| 5031 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = | 5014 const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment; |
| 5032 Traits::X86OperandMem::DefaultSegment; | |
| 5033 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { | 5015 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
| 5034 Inst->setDeleted(); | 5016 Inst->setDeleted(); |
| 5035 Constant *OffsetOp = nullptr; | 5017 Constant *OffsetOp = nullptr; |
| 5036 if (Relocatable == nullptr) { | 5018 if (Relocatable == nullptr) { |
| 5037 OffsetOp = Ctx->getConstantInt32(Offset); | 5019 OffsetOp = Ctx->getConstantInt32(Offset); |
| 5038 } else { | 5020 } else { |
| 5039 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 5021 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
| 5040 Relocatable->getName(), | 5022 Relocatable->getName(), |
| 5041 Relocatable->getSuppressMangling()); | 5023 Relocatable->getSuppressMangling()); |
| 5042 } | 5024 } |
| 5043 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, | 5025 Addr = X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| 5044 Index, Shift, SegmentReg); | 5026 Shift, SegmentReg); |
| 5045 auto *NewStore = Context.insert<InstStore>(Data, Addr); | 5027 auto *NewStore = Context.insert<InstStore>(Data, Addr); |
| 5046 if (Inst->getDest()) | 5028 if (Inst->getDest()) |
| 5047 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 5029 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 5048 } | 5030 } |
| 5049 } | 5031 } |
| 5050 | 5032 |
| 5051 template <class Machine> | 5033 template <typename TraitsType> |
| 5052 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, | 5034 Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison, |
| 5053 uint64_t Min, uint64_t Max) { | 5035 uint64_t Min, uint64_t Max) { |
| 5054 // TODO(ascull): 64-bit should not reach here but only because it is not | 5036 // TODO(ascull): 64-bit should not reach here but only because it is not |
| 5055 // implemented yet. This should be able to handle the 64-bit case. | 5037 // implemented yet. This should be able to handle the 64-bit case. |
| 5056 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); | 5038 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); |
| 5057 // Subtracting 0 is a nop so don't do it | 5039 // Subtracting 0 is a nop so don't do it |
| 5058 if (Min != 0) { | 5040 if (Min != 0) { |
| 5059 // Avoid clobbering the comparison by copying it | 5041 // Avoid clobbering the comparison by copying it |
| 5060 Variable *T = nullptr; | 5042 Variable *T = nullptr; |
| 5061 _mov(T, Comparison); | 5043 _mov(T, Comparison); |
| 5062 _sub(T, Ctx->getConstantInt32(Min)); | 5044 _sub(T, Ctx->getConstantInt32(Min)); |
| 5063 Comparison = T; | 5045 Comparison = T; |
| 5064 } | 5046 } |
| 5065 | 5047 |
| 5066 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 5048 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
| 5067 | 5049 |
| 5068 return Comparison; | 5050 return Comparison; |
| 5069 } | 5051 } |
| 5070 | 5052 |
| 5071 template <class Machine> | 5053 template <typename TraitsType> |
| 5072 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, | 5054 void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case, |
| 5073 Operand *Comparison, bool DoneCmp, | 5055 Operand *Comparison, |
| 5074 CfgNode *DefaultTarget) { | 5056 bool DoneCmp, |
| 5057 CfgNode *DefaultTarget) { | |
| 5075 switch (Case.getKind()) { | 5058 switch (Case.getKind()) { |
| 5076 case CaseCluster::JumpTable: { | 5059 case CaseCluster::JumpTable: { |
| 5077 typename Traits::Insts::Label *SkipJumpTable; | 5060 InstX86Label *SkipJumpTable; |
| 5078 | 5061 |
| 5079 Operand *RangeIndex = | 5062 Operand *RangeIndex = |
| 5080 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 5063 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 5081 if (DefaultTarget == nullptr) { | 5064 if (DefaultTarget == nullptr) { |
| 5082 // Skip over jump table logic if comparison not in range and no default | 5065 // Skip over jump table logic if comparison not in range and no default |
| 5083 SkipJumpTable = Traits::Insts::Label::create(Func, this); | 5066 SkipJumpTable = InstX86Label::create(Func, this); |
| 5084 _br(Traits::Cond::Br_a, SkipJumpTable); | 5067 _br(Traits::Cond::Br_a, SkipJumpTable); |
| 5085 } else { | 5068 } else { |
| 5086 _br(Traits::Cond::Br_a, DefaultTarget); | 5069 _br(Traits::Cond::Br_a, DefaultTarget); |
| 5087 } | 5070 } |
| 5088 | 5071 |
| 5089 InstJumpTable *JumpTable = Case.getJumpTable(); | 5072 InstJumpTable *JumpTable = Case.getJumpTable(); |
| 5090 Context.insert(JumpTable); | 5073 Context.insert(JumpTable); |
| 5091 | 5074 |
| 5092 // Make sure the index is a register of the same width as the base | 5075 // Make sure the index is a register of the same width as the base |
| 5093 Variable *Index; | 5076 Variable *Index; |
| 5094 if (RangeIndex->getType() != getPointerType()) { | 5077 if (RangeIndex->getType() != getPointerType()) { |
| 5095 Index = makeReg(getPointerType()); | 5078 Index = makeReg(getPointerType()); |
| 5096 _movzx(Index, RangeIndex); | 5079 _movzx(Index, RangeIndex); |
| 5097 } else { | 5080 } else { |
| 5098 Index = legalizeToReg(RangeIndex); | 5081 Index = legalizeToReg(RangeIndex); |
| 5099 } | 5082 } |
| 5100 | 5083 |
| 5101 constexpr RelocOffsetT RelocOffset = 0; | 5084 constexpr RelocOffsetT RelocOffset = 0; |
| 5102 constexpr bool SuppressMangling = true; | 5085 constexpr bool SuppressMangling = true; |
| 5103 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); | 5086 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| 5104 Constant *Base = Ctx->getConstantSym( | 5087 Constant *Base = Ctx->getConstantSym( |
| 5105 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), | 5088 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), |
| 5106 SuppressMangling); | 5089 SuppressMangling); |
| 5107 Constant *Offset = nullptr; | 5090 Constant *Offset = nullptr; |
| 5108 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); | 5091 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); |
| 5109 // TODO(ascull): remove need for legalize by allowing null base in memop | 5092 // TODO(ascull): remove need for legalize by allowing null base in memop |
| 5110 auto *TargetInMemory = Traits::X86OperandMem::create( | 5093 auto *TargetInMemory = X86OperandMem::create( |
| 5111 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); | 5094 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); |
| 5112 Variable *Target = nullptr; | 5095 Variable *Target = nullptr; |
| 5113 _mov(Target, TargetInMemory); | 5096 _mov(Target, TargetInMemory); |
| 5114 lowerIndirectJump(Target); | 5097 lowerIndirectJump(Target); |
| 5115 | 5098 |
| 5116 if (DefaultTarget == nullptr) | 5099 if (DefaultTarget == nullptr) |
| 5117 Context.insert(SkipJumpTable); | 5100 Context.insert(SkipJumpTable); |
| 5118 return; | 5101 return; |
| 5119 } | 5102 } |
| 5120 case CaseCluster::Range: { | 5103 case CaseCluster::Range: { |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 5136 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 5119 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 5137 _br(Traits::Cond::Br_be, Case.getTarget()); | 5120 _br(Traits::Cond::Br_be, Case.getTarget()); |
| 5138 } | 5121 } |
| 5139 if (DefaultTarget != nullptr) | 5122 if (DefaultTarget != nullptr) |
| 5140 _br(DefaultTarget); | 5123 _br(DefaultTarget); |
| 5141 return; | 5124 return; |
| 5142 } | 5125 } |
| 5143 } | 5126 } |
| 5144 } | 5127 } |
| 5145 | 5128 |
| 5146 template <class Machine> | 5129 template <typename TraitsType> |
| 5147 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 5130 void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Inst) { |
| 5148 // Group cases together and navigate through them with a binary search | 5131 // Group cases together and navigate through them with a binary search |
| 5149 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 5132 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
| 5150 Operand *Src0 = Inst->getComparison(); | 5133 Operand *Src0 = Inst->getComparison(); |
| 5151 CfgNode *DefaultTarget = Inst->getLabelDefault(); | 5134 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
| 5152 | 5135 |
| 5153 assert(CaseClusters.size() != 0); // Should always be at least one | 5136 assert(CaseClusters.size() != 0); // Should always be at least one |
| 5154 | 5137 |
| 5155 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { | 5138 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 5156 Src0 = legalize(Src0); // get Base/Index into physical registers | 5139 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 5157 Operand *Src0Lo = loOperand(Src0); | 5140 Operand *Src0Lo = loOperand(Src0); |
| 5158 Operand *Src0Hi = hiOperand(Src0); | 5141 Operand *Src0Hi = hiOperand(Src0); |
| 5159 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 5142 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
| 5160 // TODO(ascull): handle 64-bit case properly (currently naive version) | 5143 // TODO(ascull): handle 64-bit case properly (currently naive version) |
| 5161 // This might be handled by a higher level lowering of switches. | 5144 // This might be handled by a higher level lowering of switches. |
| 5162 SizeT NumCases = Inst->getNumCases(); | 5145 SizeT NumCases = Inst->getNumCases(); |
| 5163 if (NumCases >= 2) { | 5146 if (NumCases >= 2) { |
| 5164 Src0Lo = legalizeToReg(Src0Lo); | 5147 Src0Lo = legalizeToReg(Src0Lo); |
| 5165 Src0Hi = legalizeToReg(Src0Hi); | 5148 Src0Hi = legalizeToReg(Src0Hi); |
| 5166 } else { | 5149 } else { |
| 5167 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | 5150 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); |
| 5168 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 5151 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
| 5169 } | 5152 } |
| 5170 for (SizeT I = 0; I < NumCases; ++I) { | 5153 for (SizeT I = 0; I < NumCases; ++I) { |
| 5171 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); | 5154 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); |
| 5172 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); | 5155 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); |
| 5173 typename Traits::Insts::Label *Label = | 5156 InstX86Label *Label = InstX86Label::create(Func, this); |
| 5174 Traits::Insts::Label::create(Func, this); | |
| 5175 _cmp(Src0Lo, ValueLo); | 5157 _cmp(Src0Lo, ValueLo); |
| 5176 _br(Traits::Cond::Br_ne, Label); | 5158 _br(Traits::Cond::Br_ne, Label); |
| 5177 _cmp(Src0Hi, ValueHi); | 5159 _cmp(Src0Hi, ValueHi); |
| 5178 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | 5160 _br(Traits::Cond::Br_e, Inst->getLabel(I)); |
| 5179 Context.insert(Label); | 5161 Context.insert(Label); |
| 5180 } | 5162 } |
| 5181 _br(Inst->getLabelDefault()); | 5163 _br(Inst->getLabelDefault()); |
| 5182 return; | 5164 return; |
| 5183 } else { | 5165 } else { |
| 5184 // All the values are 32-bit so just check the operand is too and then | 5166 // All the values are 32-bit so just check the operand is too and then |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 5199 constexpr bool DoneCmp = false; | 5181 constexpr bool DoneCmp = false; |
| 5200 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); | 5182 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); |
| 5201 return; | 5183 return; |
| 5202 } | 5184 } |
| 5203 | 5185 |
| 5204 // Going to be using multiple times so get it in a register early | 5186 // Going to be using multiple times so get it in a register early |
| 5205 Variable *Comparison = legalizeToReg(Src0); | 5187 Variable *Comparison = legalizeToReg(Src0); |
| 5206 | 5188 |
| 5207 // A span is over the clusters | 5189 // A span is over the clusters |
| 5208 struct SearchSpan { | 5190 struct SearchSpan { |
| 5209 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) | 5191 SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label) |
| 5210 : Begin(Begin), Size(Size), Label(Label) {} | 5192 : Begin(Begin), Size(Size), Label(Label) {} |
| 5211 | 5193 |
| 5212 SizeT Begin; | 5194 SizeT Begin; |
| 5213 SizeT Size; | 5195 SizeT Size; |
| 5214 typename Traits::Insts::Label *Label; | 5196 InstX86Label *Label; |
| 5215 }; | 5197 }; |
| 5216 // The stack will only grow to the height of the tree so 12 should be plenty | 5198 // The stack will only grow to the height of the tree so 12 should be plenty |
| 5217 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack; | 5199 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack; |
| 5218 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr); | 5200 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr); |
| 5219 bool DoneCmp = false; | 5201 bool DoneCmp = false; |
| 5220 | 5202 |
| 5221 while (!SearchSpanStack.empty()) { | 5203 while (!SearchSpanStack.empty()) { |
| 5222 SearchSpan Span = SearchSpanStack.top(); | 5204 SearchSpan Span = SearchSpanStack.top(); |
| 5223 SearchSpanStack.pop(); | 5205 SearchSpanStack.pop(); |
| 5224 | 5206 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5258 DoneCmp = false; | 5240 DoneCmp = false; |
| 5259 lowerCaseCluster(*CaseB, Comparison, DoneCmp, | 5241 lowerCaseCluster(*CaseB, Comparison, DoneCmp, |
| 5260 SearchSpanStack.empty() ? nullptr : DefaultTarget); | 5242 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 5261 } break; | 5243 } break; |
| 5262 | 5244 |
| 5263 default: | 5245 default: |
| 5264 // Pick the middle item and branch b or ae | 5246 // Pick the middle item and branch b or ae |
| 5265 SizeT PivotIndex = Span.Begin + (Span.Size / 2); | 5247 SizeT PivotIndex = Span.Begin + (Span.Size / 2); |
| 5266 const CaseCluster &Pivot = CaseClusters[PivotIndex]; | 5248 const CaseCluster &Pivot = CaseClusters[PivotIndex]; |
| 5267 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); | 5249 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); |
| 5268 typename Traits::Insts::Label *Label = | 5250 InstX86Label *Label = InstX86Label::create(Func, this); |
| 5269 Traits::Insts::Label::create(Func, this); | |
| 5270 _cmp(Comparison, Value); | 5251 _cmp(Comparison, Value); |
| 5271 // TODO(ascull): does it alway have to be far? | 5252 // TODO(ascull): does it alway have to be far? |
| 5272 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far); | 5253 _br(Traits::Cond::Br_b, Label, InstX86Br::Far); |
| 5273 // Lower the left and (pivot+right) sides, falling through to the right | 5254 // Lower the left and (pivot+right) sides, falling through to the right |
| 5274 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); | 5255 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); |
| 5275 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); | 5256 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); |
| 5276 DoneCmp = true; | 5257 DoneCmp = true; |
| 5277 break; | 5258 break; |
| 5278 } | 5259 } |
| 5279 } | 5260 } |
| 5280 | 5261 |
| 5281 _br(DefaultTarget); | 5262 _br(DefaultTarget); |
| 5282 } | 5263 } |
| 5283 | 5264 |
| 5284 template <class Machine> | 5265 template <typename TraitsType> |
| 5285 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 5266 void TargetX86Base<TraitsType>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 5286 Variable *Dest, Operand *Src0, | 5267 Variable *Dest, |
| 5287 Operand *Src1) { | 5268 Operand *Src0, |
| 5269 Operand *Src1) { | |
| 5288 assert(isVectorType(Dest->getType())); | 5270 assert(isVectorType(Dest->getType())); |
| 5289 Type Ty = Dest->getType(); | 5271 Type Ty = Dest->getType(); |
| 5290 Type ElementTy = typeElementType(Ty); | 5272 Type ElementTy = typeElementType(Ty); |
| 5291 SizeT NumElements = typeNumElements(Ty); | 5273 SizeT NumElements = typeNumElements(Ty); |
| 5292 | 5274 |
| 5293 Operand *T = Ctx->getConstantUndef(Ty); | 5275 Operand *T = Ctx->getConstantUndef(Ty); |
| 5294 for (SizeT I = 0; I < NumElements; ++I) { | 5276 for (SizeT I = 0; I < NumElements; ++I) { |
| 5295 Constant *Index = Ctx->getConstantInt32(I); | 5277 Constant *Index = Ctx->getConstantInt32(I); |
| 5296 | 5278 |
| 5297 // Extract the next two inputs. | 5279 // Extract the next two inputs. |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 5316 } | 5298 } |
| 5317 | 5299 |
| 5318 /// The following pattern occurs often in lowered C and C++ code: | 5300 /// The following pattern occurs often in lowered C and C++ code: |
| 5319 /// | 5301 /// |
| 5320 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 5302 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 5321 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> | 5303 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 5322 /// | 5304 /// |
| 5323 /// We can eliminate the sext operation by copying the result of pcmpeqd, | 5305 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
| 5324 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the | 5306 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the |
| 5325 /// sext operation. | 5307 /// sext operation. |
| 5326 template <class Machine> | 5308 template <typename TraitsType> |
| 5327 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( | 5309 void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction( |
| 5328 Variable *SignExtendedResult) { | 5310 Variable *SignExtendedResult) { |
| 5329 if (auto *NextCast = | 5311 if (auto *NextCast = |
| 5330 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | 5312 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 5331 if (NextCast->getCastKind() == InstCast::Sext && | 5313 if (NextCast->getCastKind() == InstCast::Sext && |
| 5332 NextCast->getSrc(0) == SignExtendedResult) { | 5314 NextCast->getSrc(0) == SignExtendedResult) { |
| 5333 NextCast->setDeleted(); | 5315 NextCast->setDeleted(); |
| 5334 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult)); | 5316 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult)); |
| 5335 // Skip over the instruction. | 5317 // Skip over the instruction. |
| 5336 Context.advanceNext(); | 5318 Context.advanceNext(); |
| 5337 } | 5319 } |
| 5338 } | 5320 } |
| 5339 } | 5321 } |
| 5340 | 5322 |
| 5341 template <class Machine> | 5323 template <typename TraitsType> |
| 5342 void TargetX86Base<Machine>::lowerUnreachable( | 5324 void TargetX86Base<TraitsType>::lowerUnreachable( |
| 5343 const InstUnreachable * /*Inst*/) { | 5325 const InstUnreachable * /*Inst*/) { |
| 5344 _ud2(); | 5326 _ud2(); |
| 5345 // Add a fake use of esp to make sure esp adjustments after the unreachable | 5327 // Add a fake use of esp to make sure esp adjustments after the unreachable |
| 5346 // do not get dead-code eliminated. | 5328 // do not get dead-code eliminated. |
| 5347 keepEspLiveAtExit(); | 5329 keepEspLiveAtExit(); |
| 5348 } | 5330 } |
| 5349 | 5331 |
| 5350 template <class Machine> | 5332 template <typename TraitsType> |
| 5351 void TargetX86Base<Machine>::lowerRMW( | 5333 void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) { |
| 5352 const typename Traits::Insts::FakeRMW *RMW) { | |
| 5353 // If the beacon variable's live range does not end in this instruction, then | 5334 // If the beacon variable's live range does not end in this instruction, then |
| 5354 // it must end in the modified Store instruction that follows. This means | 5335 // it must end in the modified Store instruction that follows. This means |
| 5355 // that the original Store instruction is still there, either because the | 5336 // that the original Store instruction is still there, either because the |
| 5356 // value being stored is used beyond the Store instruction, or because dead | 5337 // value being stored is used beyond the Store instruction, or because dead |
| 5357 // code elimination did not happen. In either case, we cancel RMW lowering | 5338 // code elimination did not happen. In either case, we cancel RMW lowering |
| 5358 // (and the caller deletes the RMW instruction). | 5339 // (and the caller deletes the RMW instruction). |
| 5359 if (!RMW->isLastUse(RMW->getBeacon())) | 5340 if (!RMW->isLastUse(RMW->getBeacon())) |
| 5360 return; | 5341 return; |
| 5361 Operand *Src = RMW->getData(); | 5342 Operand *Src = RMW->getData(); |
| 5362 Type Ty = Src->getType(); | 5343 Type Ty = Src->getType(); |
| 5363 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); | 5344 X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
| 5364 doMockBoundsCheck(Addr); | 5345 doMockBoundsCheck(Addr); |
| 5365 if (!Traits::Is64Bit && Ty == IceType_i64) { | 5346 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 5366 Src = legalizeUndef(Src); | 5347 Src = legalizeUndef(Src); |
| 5367 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); | 5348 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); |
| 5368 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); | 5349 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); |
| 5369 typename Traits::X86OperandMem *AddrLo = | 5350 X86OperandMem *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr)); |
| 5370 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); | 5351 X86OperandMem *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr)); |
| 5371 typename Traits::X86OperandMem *AddrHi = | |
| 5372 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); | |
| 5373 switch (RMW->getOp()) { | 5352 switch (RMW->getOp()) { |
| 5374 default: | 5353 default: |
| 5375 // TODO(stichnot): Implement other arithmetic operators. | 5354 // TODO(stichnot): Implement other arithmetic operators. |
| 5376 break; | 5355 break; |
| 5377 case InstArithmetic::Add: | 5356 case InstArithmetic::Add: |
| 5378 _add_rmw(AddrLo, SrcLo); | 5357 _add_rmw(AddrLo, SrcLo); |
| 5379 _adc_rmw(AddrHi, SrcHi); | 5358 _adc_rmw(AddrHi, SrcHi); |
| 5380 return; | 5359 return; |
| 5381 case InstArithmetic::Sub: | 5360 case InstArithmetic::Sub: |
| 5382 _sub_rmw(AddrLo, SrcLo); | 5361 _sub_rmw(AddrLo, SrcLo); |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5420 return; | 5399 return; |
| 5421 case InstArithmetic::Xor: | 5400 case InstArithmetic::Xor: |
| 5422 Src = legalize(Src, Legal_Reg | Legal_Imm); | 5401 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| 5423 _xor_rmw(Addr, Src); | 5402 _xor_rmw(Addr, Src); |
| 5424 return; | 5403 return; |
| 5425 } | 5404 } |
| 5426 } | 5405 } |
| 5427 llvm::report_fatal_error("Couldn't lower RMW instruction"); | 5406 llvm::report_fatal_error("Couldn't lower RMW instruction"); |
| 5428 } | 5407 } |
| 5429 | 5408 |
| 5430 template <class Machine> | 5409 template <typename TraitsType> |
| 5431 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { | 5410 void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) { |
| 5432 if (const auto *RMW = | 5411 if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) { |
| 5433 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) { | |
| 5434 lowerRMW(RMW); | 5412 lowerRMW(RMW); |
| 5435 } else { | 5413 } else { |
| 5436 TargetLowering::lowerOther(Instr); | 5414 TargetLowering::lowerOther(Instr); |
| 5437 } | 5415 } |
| 5438 } | 5416 } |
| 5439 | 5417 |
| 5440 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve | 5418 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve |
| 5441 /// integrity of liveness analysis. Undef values are also turned into zeroes, | 5419 /// integrity of liveness analysis. Undef values are also turned into zeroes, |
| 5442 /// since loOperand() and hiOperand() don't expect Undef input. | 5420 /// since loOperand() and hiOperand() don't expect Undef input. |
| 5443 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { | 5421 template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() { |
| 5444 if (Traits::Is64Bit) { | 5422 if (Traits::Is64Bit) { |
| 5445 // On x86-64 we don't need to prelower phis -- the architecture can handle | 5423 // On x86-64 we don't need to prelower phis -- the architecture can handle |
| 5446 // 64-bit integer natively. | 5424 // 64-bit integer natively. |
| 5447 return; | 5425 return; |
| 5448 } | 5426 } |
| 5449 | 5427 |
| 5450 // Pause constant blinding or pooling, blinding or pooling will be done later | 5428 // Pause constant blinding or pooling, blinding or pooling will be done later |
| 5451 // during phi lowering assignments | 5429 // during phi lowering assignments |
| 5452 BoolFlagSaver B(RandomizationPoolingPaused, true); | 5430 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 5453 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( | 5431 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( |
| 5454 this, Context.getNode(), Func); | 5432 this, Context.getNode(), Func); |
| 5455 } | 5433 } |
| 5456 | 5434 |
| 5457 template <class Machine> | 5435 template <typename TraitsType> |
| 5458 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) { | 5436 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { |
| 5459 uint32_t StackArgumentsSize = 0; | 5437 uint32_t StackArgumentsSize = 0; |
| 5460 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 5438 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { |
| 5461 const char *HelperName = nullptr; | 5439 const char *HelperName = nullptr; |
| 5462 Variable *Dest = Arith->getDest(); | 5440 Variable *Dest = Arith->getDest(); |
| 5463 Type DestTy = Dest->getType(); | 5441 Type DestTy = Dest->getType(); |
| 5464 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 5442 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 5465 switch (Arith->getOp()) { | 5443 switch (Arith->getOp()) { |
| 5466 default: | 5444 default: |
| 5467 return; | 5445 return; |
| 5468 case InstArithmetic::Udiv: | 5446 case InstArithmetic::Udiv: |
| (...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5673 if (!isScalarFloatingType(ReturnType)) | 5651 if (!isScalarFloatingType(ReturnType)) |
| 5674 return; | 5652 return; |
| 5675 StackArgumentsSize = typeWidthInBytes(ReturnType); | 5653 StackArgumentsSize = typeWidthInBytes(ReturnType); |
| 5676 } else { | 5654 } else { |
| 5677 return; | 5655 return; |
| 5678 } | 5656 } |
| 5679 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize); | 5657 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize); |
| 5680 updateMaxOutArgsSizeBytes(StackArgumentsSize); | 5658 updateMaxOutArgsSizeBytes(StackArgumentsSize); |
| 5681 } | 5659 } |
| 5682 | 5660 |
| 5683 template <class Machine> | 5661 template <typename TraitsType> |
| 5684 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes( | 5662 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes( |
| 5685 const std::vector<Type> &ArgTypes, Type ReturnType) { | 5663 const std::vector<Type> &ArgTypes, Type ReturnType) { |
| 5686 uint32_t OutArgumentsSizeBytes = 0; | 5664 uint32_t OutArgumentsSizeBytes = 0; |
| 5687 uint32_t XmmArgCount = 0; | 5665 uint32_t XmmArgCount = 0; |
| 5688 uint32_t GprArgCount = 0; | 5666 uint32_t GprArgCount = 0; |
| 5689 for (Type Ty : ArgTypes) { | 5667 for (Type Ty : ArgTypes) { |
| 5690 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 5668 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 5691 assert(typeWidthInBytes(Ty) >= 4); | 5669 assert(typeWidthInBytes(Ty) >= 4); |
| 5692 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { | 5670 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { |
| 5693 ++XmmArgCount; | 5671 ++XmmArgCount; |
| 5694 } else if (isScalarIntegerType(Ty) && | 5672 } else if (isScalarIntegerType(Ty) && |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 5708 // The 32 bit ABI requires floating point values to be returned on the x87 FP | 5686 // The 32 bit ABI requires floating point values to be returned on the x87 FP |
| 5709 // stack. Ensure there is enough space for the fstp/movs for floating returns. | 5687 // stack. Ensure there is enough space for the fstp/movs for floating returns. |
| 5710 if (isScalarFloatingType(ReturnType)) { | 5688 if (isScalarFloatingType(ReturnType)) { |
| 5711 OutArgumentsSizeBytes = | 5689 OutArgumentsSizeBytes = |
| 5712 std::max(OutArgumentsSizeBytes, | 5690 std::max(OutArgumentsSizeBytes, |
| 5713 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType))); | 5691 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType))); |
| 5714 } | 5692 } |
| 5715 return OutArgumentsSizeBytes; | 5693 return OutArgumentsSizeBytes; |
| 5716 } | 5694 } |
| 5717 | 5695 |
| 5718 template <class Machine> | 5696 template <typename TraitsType> |
| 5719 uint32_t | 5697 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes( |
| 5720 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { | 5698 const InstCall *Instr) { |
| 5721 // Build a vector of the arguments' types. | 5699 // Build a vector of the arguments' types. |
| 5722 std::vector<Type> ArgTypes; | 5700 std::vector<Type> ArgTypes; |
| 5723 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 5701 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 5724 Operand *Arg = Instr->getArg(i); | 5702 Operand *Arg = Instr->getArg(i); |
| 5725 ArgTypes.emplace_back(Arg->getType()); | 5703 ArgTypes.emplace_back(Arg->getType()); |
| 5726 } | 5704 } |
| 5727 // Compute the return type (if any); | 5705 // Compute the return type (if any); |
| 5728 Type ReturnType = IceType_void; | 5706 Type ReturnType = IceType_void; |
| 5729 Variable *Dest = Instr->getDest(); | 5707 Variable *Dest = Instr->getDest(); |
| 5730 if (Dest != nullptr) | 5708 if (Dest != nullptr) |
| 5731 ReturnType = Dest->getType(); | 5709 ReturnType = Dest->getType(); |
| 5732 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); | 5710 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); |
| 5733 } | 5711 } |
| 5734 | 5712 |
| 5735 template <class Machine> | 5713 template <typename TraitsType> |
| 5736 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { | 5714 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, |
| 5715 int32_t RegNum) { | |
| 5737 Variable *Reg = makeReg(Ty, RegNum); | 5716 Variable *Reg = makeReg(Ty, RegNum); |
| 5738 switch (Ty) { | 5717 switch (Ty) { |
| 5739 case IceType_i1: | 5718 case IceType_i1: |
| 5740 case IceType_i8: | 5719 case IceType_i8: |
| 5741 case IceType_i16: | 5720 case IceType_i16: |
| 5742 case IceType_i32: | 5721 case IceType_i32: |
| 5743 case IceType_i64: | 5722 case IceType_i64: |
| 5744 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 5723 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
| 5745 _mov(Reg, Ctx->getConstantZero(Ty)); | 5724 _mov(Reg, Ctx->getConstantZero(Ty)); |
| 5746 break; | 5725 break; |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 5759 return Reg; | 5738 return Reg; |
| 5760 } | 5739 } |
| 5761 | 5740 |
| 5762 // There is no support for loading or emitting vector constants, so the vector | 5741 // There is no support for loading or emitting vector constants, so the vector |
| 5763 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are | 5742 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are |
| 5764 // initialized with register operations. | 5743 // initialized with register operations. |
| 5765 // | 5744 // |
| 5766 // TODO(wala): Add limited support for vector constants so that complex | 5745 // TODO(wala): Add limited support for vector constants so that complex |
| 5767 // initialization in registers is unnecessary. | 5746 // initialization in registers is unnecessary. |
| 5768 | 5747 |
| 5769 template <class Machine> | 5748 template <typename TraitsType> |
| 5770 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5749 Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty, |
| 5750 int32_t RegNum) { | |
| 5771 return makeZeroedRegister(Ty, RegNum); | 5751 return makeZeroedRegister(Ty, RegNum); |
| 5772 } | 5752 } |
| 5773 | 5753 |
| 5774 template <class Machine> | 5754 template <typename TraitsType> |
| 5775 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, | 5755 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
| 5776 int32_t RegNum) { | 5756 int32_t RegNum) { |
| 5777 Variable *MinusOnes = makeReg(Ty, RegNum); | 5757 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 5778 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5758 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 5779 Context.insert<InstFakeDef>(MinusOnes); | 5759 Context.insert<InstFakeDef>(MinusOnes); |
| 5780 _pcmpeq(MinusOnes, MinusOnes); | 5760 _pcmpeq(MinusOnes, MinusOnes); |
| 5781 return MinusOnes; | 5761 return MinusOnes; |
| 5782 } | 5762 } |
| 5783 | 5763 |
| 5784 template <class Machine> | 5764 template <typename TraitsType> |
| 5785 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5765 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
| 5786 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5766 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 5787 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5767 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 5788 _psub(Dest, MinusOne); | 5768 _psub(Dest, MinusOne); |
| 5789 return Dest; | 5769 return Dest; |
| 5790 } | 5770 } |
| 5791 | 5771 |
| 5792 template <class Machine> | 5772 template <typename TraitsType> |
| 5793 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, | 5773 Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty, |
| 5794 int32_t RegNum) { | 5774 int32_t RegNum) { |
| 5795 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 5775 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| 5796 Ty == IceType_v16i8); | 5776 Ty == IceType_v16i8); |
| 5797 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 5777 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| 5798 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 5778 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| 5799 SizeT Shift = | 5779 SizeT Shift = |
| 5800 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 5780 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
| 5801 _psll(Reg, Ctx->getConstantInt8(Shift)); | 5781 _psll(Reg, Ctx->getConstantInt8(Shift)); |
| 5802 return Reg; | 5782 return Reg; |
| 5803 } else { | 5783 } else { |
| 5804 // SSE has no left shift operation for vectors of 8 bit integers. | 5784 // SSE has no left shift operation for vectors of 8 bit integers. |
| 5805 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 5785 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 5806 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 5786 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 5807 Variable *Reg = makeReg(Ty, RegNum); | 5787 Variable *Reg = makeReg(Ty, RegNum); |
| 5808 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 5788 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 5809 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 5789 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 5810 return Reg; | 5790 return Reg; |
| 5811 } | 5791 } |
| 5812 } | 5792 } |
| 5813 | 5793 |
| 5814 /// Construct a mask in a register that can be and'ed with a floating-point | 5794 /// Construct a mask in a register that can be and'ed with a floating-point |
| 5815 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 5795 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
| 5816 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of | 5796 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of |
| 5817 /// ones logically right shifted one bit. | 5797 /// ones logically right shifted one bit. |
| 5818 // TODO(stichnot): Fix the wala | 5798 // TODO(stichnot): Fix the wala |
| 5819 // TODO: above, to represent vector constants in memory. | 5799 // TODO: above, to represent vector constants in memory. |
| 5820 template <class Machine> | 5800 template <typename TraitsType> |
| 5821 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, | 5801 Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty, |
| 5822 int32_t RegNum) { | 5802 int32_t RegNum) { |
| 5823 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); | 5803 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); |
| 5824 _psrl(Reg, Ctx->getConstantInt8(1)); | 5804 _psrl(Reg, Ctx->getConstantInt8(1)); |
| 5825 return Reg; | 5805 return Reg; |
| 5826 } | 5806 } |
| 5827 | 5807 |
| 5828 template <class Machine> | 5808 template <typename TraitsType> |
| 5829 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 5809 typename TargetX86Base<TraitsType>::X86OperandMem * |
| 5830 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 5810 TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| 5831 uint32_t Offset) { | 5811 uint32_t Offset) { |
| 5832 // Ensure that Loc is a stack slot. | 5812 // Ensure that Loc is a stack slot. |
| 5833 assert(Slot->mustNotHaveReg()); | 5813 assert(Slot->mustNotHaveReg()); |
| 5834 assert(Slot->getRegNum() == Variable::NoRegister); | 5814 assert(Slot->getRegNum() == Variable::NoRegister); |
| 5835 // Compute the location of Loc in memory. | 5815 // Compute the location of Loc in memory. |
| 5836 // TODO(wala,stichnot): lea should not | 5816 // TODO(wala,stichnot): lea should not |
| 5837 // be required. The address of the stack slot is known at compile time | 5817 // be required. The address of the stack slot is known at compile time |
| 5838 // (although not until after addProlog()). | 5818 // (although not until after addProlog()). |
| 5839 constexpr Type PointerType = IceType_i32; | 5819 constexpr Type PointerType = IceType_i32; |
| 5840 Variable *Loc = makeReg(PointerType); | 5820 Variable *Loc = makeReg(PointerType); |
| 5841 _lea(Loc, Slot); | 5821 _lea(Loc, Slot); |
| 5842 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5822 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 5843 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); | 5823 return X86OperandMem::create(Func, Ty, Loc, ConstantOffset); |
| 5844 } | 5824 } |
| 5845 | 5825 |
| 5846 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR. | 5826 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR. |
| 5847 /// Src is assumed to already be legalized. If the source operand is known to | 5827 /// Src is assumed to already be legalized. If the source operand is known to |
| 5848 /// be a memory or immediate operand, a simple mov will suffice. But if the | 5828 /// be a memory or immediate operand, a simple mov will suffice. But if the |
| 5849 /// source operand can be a physical register, then it must first be copied into | 5829 /// source operand can be a physical register, then it must first be copied into |
| 5850 /// a physical register that is truncable to 8-bit, then truncated into a | 5830 /// a physical register that is truncable to 8-bit, then truncated into a |
| 5851 /// physical register that can receive a truncation, and finally copied into the | 5831 /// physical register that can receive a truncation, and finally copied into the |
| 5852 /// result 8-bit register (which in general can be any 8-bit register). For | 5832 /// result 8-bit register (which in general can be any 8-bit register). For |
| 5853 /// example, moving %ebp into %ah may be accomplished as: | 5833 /// example, moving %ebp into %ah may be accomplished as: |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 5864 /// Reg_ah. | 5844 /// Reg_ah. |
| 5865 /// | 5845 /// |
| 5866 /// Note #2. ConstantRelocatable operands are also put through this process | 5846 /// Note #2. ConstantRelocatable operands are also put through this process |
| 5867 /// (not truncated directly) because our ELF emitter does R_386_32 relocations | 5847 /// (not truncated directly) because our ELF emitter does R_386_32 relocations |
| 5868 /// but not R_386_8 relocations. | 5848 /// but not R_386_8 relocations. |
| 5869 /// | 5849 /// |
| 5870 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8 | 5850 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8 |
| 5871 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper | 5851 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper |
| 5872 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument | 5852 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument |
| 5873 /// to the pinsrb instruction. | 5853 /// to the pinsrb instruction. |
| 5874 template <class Machine> | 5854 template <typename TraitsType> |
| 5875 Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) { | 5855 Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, int32_t RegNum) { |
| 5876 Type Ty = Src->getType(); | 5856 Type Ty = Src->getType(); |
| 5877 assert(isScalarIntegerType(Ty)); | 5857 assert(isScalarIntegerType(Ty)); |
| 5878 assert(Ty != IceType_i1); | 5858 assert(Ty != IceType_i1); |
| 5879 Variable *Reg = makeReg(IceType_i8, RegNum); | 5859 Variable *Reg = makeReg(IceType_i8, RegNum); |
| 5880 Reg->setRegClass(RCX86_IsTrunc8Rcvr); | 5860 Reg->setRegClass(RCX86_IsTrunc8Rcvr); |
| 5881 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) { | 5861 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) { |
| 5882 Variable *SrcTruncable = makeReg(Ty); | 5862 Variable *SrcTruncable = makeReg(Ty); |
| 5883 switch (Ty) { | 5863 switch (Ty) { |
| 5884 case IceType_i64: | 5864 case IceType_i64: |
| 5885 SrcTruncable->setRegClass(RCX86_Is64To8); | 5865 SrcTruncable->setRegClass(RCX86_Is64To8); |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 5899 _mov(SrcTruncable, Src); | 5879 _mov(SrcTruncable, Src); |
| 5900 _mov(SrcRcvr, SrcTruncable); | 5880 _mov(SrcRcvr, SrcTruncable); |
| 5901 Src = SrcRcvr; | 5881 Src = SrcRcvr; |
| 5902 } | 5882 } |
| 5903 _mov(Reg, Src); | 5883 _mov(Reg, Src); |
| 5904 return Reg; | 5884 return Reg; |
| 5905 } | 5885 } |
| 5906 | 5886 |
| 5907 /// Helper for legalize() to emit the right code to lower an operand to a | 5887 /// Helper for legalize() to emit the right code to lower an operand to a |
| 5908 /// register of the appropriate type. | 5888 /// register of the appropriate type. |
| 5909 template <class Machine> | 5889 template <typename TraitsType> |
| 5910 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5890 Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, int32_t RegNum) { |
| 5911 Type Ty = Src->getType(); | 5891 Type Ty = Src->getType(); |
| 5912 Variable *Reg = makeReg(Ty, RegNum); | 5892 Variable *Reg = makeReg(Ty, RegNum); |
| 5913 if (isVectorType(Ty)) { | 5893 if (isVectorType(Ty)) { |
| 5914 _movp(Reg, Src); | 5894 _movp(Reg, Src); |
| 5915 } else { | 5895 } else { |
| 5916 _mov(Reg, Src); | 5896 _mov(Reg, Src); |
| 5917 } | 5897 } |
| 5918 return Reg; | 5898 return Reg; |
| 5919 } | 5899 } |
| 5920 | 5900 |
| 5921 template <class Machine> | 5901 template <typename TraitsType> |
| 5922 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, | 5902 Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed, |
| 5923 int32_t RegNum) { | 5903 int32_t RegNum) { |
| 5924 Type Ty = From->getType(); | 5904 Type Ty = From->getType(); |
| 5925 // Assert that a physical register is allowed. To date, all calls to | 5905 // Assert that a physical register is allowed. To date, all calls to |
| 5926 // legalize() allow a physical register. If a physical register needs to be | 5906 // legalize() allow a physical register. If a physical register needs to be |
| 5927 // explicitly disallowed, then new code will need to be written to force a | 5907 // explicitly disallowed, then new code will need to be written to force a |
| 5928 // spill. | 5908 // spill. |
| 5929 assert(Allowed & Legal_Reg); | 5909 assert(Allowed & Legal_Reg); |
| 5930 // If we're asking for a specific physical register, make sure we're not | 5910 // If we're asking for a specific physical register, make sure we're not |
| 5931 // allowing any other operand kinds. (This could be future work, e.g. allow | 5911 // allowing any other operand kinds. (This could be future work, e.g. allow |
| 5932 // the shl shift amount to be either an immediate or in ecx.) | 5912 // the shl shift amount to be either an immediate or in ecx.) |
| 5933 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); | 5913 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); |
| 5934 | 5914 |
| 5935 // Substitute with an available infinite-weight variable if possible. Only do | 5915 // Substitute with an available infinite-weight variable if possible. Only do |
| 5936 // this when we are not asking for a specific register, and when the | 5916 // this when we are not asking for a specific register, and when the |
| 5937 // substitution is not locked to a specific register, and when the types | 5917 // substitution is not locked to a specific register, and when the types |
| 5938 // match, in order to capture the vast majority of opportunities and avoid | 5918 // match, in order to capture the vast majority of opportunities and avoid |
| 5939 // corner cases in the lowering. | 5919 // corner cases in the lowering. |
| 5940 if (RegNum == Variable::NoRegister) { | 5920 if (RegNum == Variable::NoRegister) { |
| 5941 if (Variable *Subst = getContext().availabilityGet(From)) { | 5921 if (Variable *Subst = getContext().availabilityGet(From)) { |
| 5942 // At this point we know there is a potential substitution available. | 5922 // At this point we know there is a potential substitution available. |
| 5943 if (Subst->mustHaveReg() && !Subst->hasReg()) { | 5923 if (Subst->mustHaveReg() && !Subst->hasReg()) { |
| 5944 // At this point we know the substitution will have a register. | 5924 // At this point we know the substitution will have a register. |
| 5945 if (From->getType() == Subst->getType()) { | 5925 if (From->getType() == Subst->getType()) { |
| 5946 // At this point we know the substitution's register is compatible. | 5926 // At this point we know the substitution's register is compatible. |
| 5947 return Subst; | 5927 return Subst; |
| 5948 } | 5928 } |
| 5949 } | 5929 } |
| 5950 } | 5930 } |
| 5951 } | 5931 } |
| 5952 | 5932 |
| 5953 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 5933 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) { |
| 5954 // Before doing anything with a Mem operand, we need to ensure that the | 5934 // Before doing anything with a Mem operand, we need to ensure that the |
| 5955 // Base and Index components are in physical registers. | 5935 // Base and Index components are in physical registers. |
| 5956 Variable *Base = Mem->getBase(); | 5936 Variable *Base = Mem->getBase(); |
| 5957 Variable *Index = Mem->getIndex(); | 5937 Variable *Index = Mem->getIndex(); |
| 5958 Variable *RegBase = nullptr; | 5938 Variable *RegBase = nullptr; |
| 5959 Variable *RegIndex = nullptr; | 5939 Variable *RegIndex = nullptr; |
| 5960 if (Base) { | 5940 if (Base) { |
| 5961 RegBase = llvm::cast<Variable>( | 5941 RegBase = llvm::cast<Variable>( |
| 5962 legalize(Base, Legal_Reg | Legal_Rematerializable)); | 5942 legalize(Base, Legal_Reg | Legal_Rematerializable)); |
| 5963 } | 5943 } |
| 5964 if (Index) { | 5944 if (Index) { |
| 5965 RegIndex = llvm::cast<Variable>( | 5945 RegIndex = llvm::cast<Variable>( |
| 5966 legalize(Index, Legal_Reg | Legal_Rematerializable)); | 5946 legalize(Index, Legal_Reg | Legal_Rematerializable)); |
| 5967 } | 5947 } |
| 5968 if (Base != RegBase || Index != RegIndex) { | 5948 if (Base != RegBase || Index != RegIndex) { |
| 5969 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), | 5949 Mem = X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex, |
| 5970 RegIndex, Mem->getShift(), | 5950 Mem->getShift(), Mem->getSegmentRegister()); |
| 5971 Mem->getSegmentRegister()); | |
| 5972 } | 5951 } |
| 5973 | 5952 |
| 5974 // For all Memory Operands, we do randomization/pooling here | 5953 // For all Memory Operands, we do randomization/pooling here |
| 5975 From = randomizeOrPoolImmediate(Mem); | 5954 From = randomizeOrPoolImmediate(Mem); |
| 5976 | 5955 |
| 5977 if (!(Allowed & Legal_Mem)) { | 5956 if (!(Allowed & Legal_Mem)) { |
| 5978 From = copyToReg(From, RegNum); | 5957 From = copyToReg(From, RegNum); |
| 5979 } | 5958 } |
| 5980 return From; | 5959 return From; |
| 5981 } | 5960 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6018 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { | 5997 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { |
| 6019 if (Utils::isPositiveZero(ConstDouble->getValue())) | 5998 if (Utils::isPositiveZero(ConstDouble->getValue())) |
| 6020 return makeZeroedRegister(Ty, RegNum); | 5999 return makeZeroedRegister(Ty, RegNum); |
| 6021 } | 6000 } |
| 6022 Variable *Base = nullptr; | 6001 Variable *Base = nullptr; |
| 6023 std::string Buffer; | 6002 std::string Buffer; |
| 6024 llvm::raw_string_ostream StrBuf(Buffer); | 6003 llvm::raw_string_ostream StrBuf(Buffer); |
| 6025 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 6004 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
| 6026 llvm::cast<Constant>(From)->setShouldBePooled(true); | 6005 llvm::cast<Constant>(From)->setShouldBePooled(true); |
| 6027 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 6006 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
| 6028 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 6007 From = X86OperandMem::create(Func, Ty, Base, Offset); |
| 6029 } | 6008 } |
| 6030 bool NeedsReg = false; | 6009 bool NeedsReg = false; |
| 6031 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) | 6010 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) |
| 6032 // Immediate specifically not allowed | 6011 // Immediate specifically not allowed |
| 6033 NeedsReg = true; | 6012 NeedsReg = true; |
| 6034 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) | 6013 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) |
| 6035 // On x86, FP constants are lowered to mem operands. | 6014 // On x86, FP constants are lowered to mem operands. |
| 6036 NeedsReg = true; | 6015 NeedsReg = true; |
| 6037 if (NeedsReg) { | 6016 if (NeedsReg) { |
| 6038 From = copyToReg(From, RegNum); | 6017 From = copyToReg(From, RegNum); |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 6050 // - Mem is not allowed and Var isn't guaranteed a physical register, or | 6029 // - Mem is not allowed and Var isn't guaranteed a physical register, or |
| 6051 // - RegNum is required and Var->getRegNum() doesn't match, or | 6030 // - RegNum is required and Var->getRegNum() doesn't match, or |
| 6052 // - Var is a rematerializable variable and rematerializable pass-through is | 6031 // - Var is a rematerializable variable and rematerializable pass-through is |
| 6053 // not allowed (in which case we need an lea instruction). | 6032 // not allowed (in which case we need an lea instruction). |
| 6054 if (MustRematerialize) { | 6033 if (MustRematerialize) { |
| 6055 assert(Ty == IceType_i32); | 6034 assert(Ty == IceType_i32); |
| 6056 Variable *NewVar = makeReg(Ty, RegNum); | 6035 Variable *NewVar = makeReg(Ty, RegNum); |
| 6057 // Since Var is rematerializable, the offset will be added when the lea is | 6036 // Since Var is rematerializable, the offset will be added when the lea is |
| 6058 // emitted. | 6037 // emitted. |
| 6059 constexpr Constant *NoOffset = nullptr; | 6038 constexpr Constant *NoOffset = nullptr; |
| 6060 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset); | 6039 auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset); |
| 6061 _lea(NewVar, Mem); | 6040 _lea(NewVar, Mem); |
| 6062 From = NewVar; | 6041 From = NewVar; |
| 6063 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 6042 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
| 6064 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) || | 6043 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) || |
| 6065 MustRematerialize) { | 6044 MustRematerialize) { |
| 6066 From = copyToReg(From, RegNum); | 6045 From = copyToReg(From, RegNum); |
| 6067 } | 6046 } |
| 6068 return From; | 6047 return From; |
| 6069 } | 6048 } |
| 6070 llvm_unreachable("Unhandled operand kind in legalize()"); | 6049 llvm_unreachable("Unhandled operand kind in legalize()"); |
| 6071 return From; | 6050 return From; |
| 6072 } | 6051 } |
| 6073 | 6052 |
| 6074 /// Provide a trivial wrapper to legalize() for this common usage. | 6053 /// Provide a trivial wrapper to legalize() for this common usage. |
| 6075 template <class Machine> | 6054 template <typename TraitsType> |
| 6076 Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) { | 6055 Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From, |
| 6056 int32_t RegNum) { | |
| 6077 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 6057 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
| 6078 } | 6058 } |
| 6079 | 6059 |
| 6080 /// Legalize undef values to concrete values. | 6060 /// Legalize undef values to concrete values. |
| 6081 template <class Machine> | 6061 template <typename TraitsType> |
| 6082 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { | 6062 Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From, |
| 6063 int32_t RegNum) { | |
| 6083 Type Ty = From->getType(); | 6064 Type Ty = From->getType(); |
| 6084 if (llvm::isa<ConstantUndef>(From)) { | 6065 if (llvm::isa<ConstantUndef>(From)) { |
| 6085 // Lower undefs to zero. Another option is to lower undefs to an | 6066 // Lower undefs to zero. Another option is to lower undefs to an |
| 6086 // uninitialized register; however, using an uninitialized register results | 6067 // uninitialized register; however, using an uninitialized register results |
| 6087 // in less predictable code. | 6068 // in less predictable code. |
| 6088 // | 6069 // |
| 6089 // If in the future the implementation is changed to lower undef values to | 6070 // If in the future the implementation is changed to lower undef values to |
| 6090 // uninitialized registers, a FakeDef will be needed: | 6071 // uninitialized registers, a FakeDef will be needed: |
| 6091 // Context.insert<InstFakeDef>(Reg); | 6072 // Context.insert<InstFakeDef>(Reg); |
| 6092 // This is in order to ensure that the live range of Reg is not | 6073 // This is in order to ensure that the live range of Reg is not |
| 6093 // overestimated. If the constant being lowered is a 64 bit value, then | 6074 // overestimated. If the constant being lowered is a 64 bit value, then |
| 6094 // the result should be split and the lo and hi components will need to go | 6075 // the result should be split and the lo and hi components will need to go |
| 6095 // in uninitialized registers. | 6076 // in uninitialized registers. |
| 6096 if (isVectorType(Ty)) | 6077 if (isVectorType(Ty)) |
| 6097 return makeVectorOfZeros(Ty, RegNum); | 6078 return makeVectorOfZeros(Ty, RegNum); |
| 6098 return Ctx->getConstantZero(Ty); | 6079 return Ctx->getConstantZero(Ty); |
| 6099 } | 6080 } |
| 6100 return From; | 6081 return From; |
| 6101 } | 6082 } |
| 6102 | 6083 |
| 6103 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical | 6084 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical |
| 6104 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be | 6085 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be |
| 6105 /// copied into a physical register. (Actually, either Src0 or Src1 can be | 6086 /// copied into a physical register. (Actually, either Src0 or Src1 can be |
| 6106 /// chosen for the physical register, but unfortunately we have to commit to one | 6087 /// chosen for the physical register, but unfortunately we have to commit to one |
| 6107 /// or the other before register allocation.) | 6088 /// or the other before register allocation.) |
| 6108 template <class Machine> | 6089 template <typename TraitsType> |
| 6109 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, | 6090 Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0, |
| 6110 Operand *Src1) { | 6091 Operand *Src1) { |
| 6111 bool IsSrc1ImmOrReg = false; | 6092 bool IsSrc1ImmOrReg = false; |
| 6112 if (llvm::isa<Constant>(Src1)) { | 6093 if (llvm::isa<Constant>(Src1)) { |
| 6113 IsSrc1ImmOrReg = true; | 6094 IsSrc1ImmOrReg = true; |
| 6114 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) { | 6095 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 6115 if (Var->hasReg()) | 6096 if (Var->hasReg()) |
| 6116 IsSrc1ImmOrReg = true; | 6097 IsSrc1ImmOrReg = true; |
| 6117 } | 6098 } |
| 6118 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); | 6099 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| 6119 } | 6100 } |
| 6120 | 6101 |
| 6121 template <class Machine> | 6102 template <typename TraitsType> |
| 6122 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 6103 typename TargetX86Base<TraitsType>::X86OperandMem * |
| 6123 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty, | 6104 TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty, |
| 6124 bool DoLegalize) { | 6105 bool DoLegalize) { |
| 6125 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd); | 6106 auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd); |
| 6126 // It may be the case that address mode optimization already creates an | 6107 // It may be the case that address mode optimization already creates an |
| 6127 // Traits::X86OperandMem, so in that case it wouldn't need another level of | 6108 // X86OperandMem, so in that case it wouldn't need another level of |
| 6128 // transformation. | 6109 // transformation. |
| 6129 if (!Mem) { | 6110 if (!Mem) { |
| 6130 auto *Base = llvm::dyn_cast<Variable>(Opnd); | 6111 auto *Base = llvm::dyn_cast<Variable>(Opnd); |
| 6131 auto *Offset = llvm::dyn_cast<Constant>(Opnd); | 6112 auto *Offset = llvm::dyn_cast<Constant>(Opnd); |
| 6132 assert(Base || Offset); | 6113 assert(Base || Offset); |
| 6133 if (Offset) { | 6114 if (Offset) { |
| 6134 // During memory operand building, we do not blind or pool the constant | 6115 // During memory operand building, we do not blind or pool the constant |
| 6135 // offset, we will work on the whole memory operand later as one entity | 6116 // offset, we will work on the whole memory operand later as one entity |
| 6136 // later, this save one instruction. By turning blinding and pooling off, | 6117 // later, this save one instruction. By turning blinding and pooling off, |
| 6137 // we guarantee legalize(Offset) will return a Constant*. | 6118 // we guarantee legalize(Offset) will return a Constant*. |
| 6138 { | 6119 { |
| 6139 BoolFlagSaver B(RandomizationPoolingPaused, true); | 6120 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 6140 | 6121 |
| 6141 Offset = llvm::cast<Constant>(legalize(Offset)); | 6122 Offset = llvm::cast<Constant>(legalize(Offset)); |
| 6142 } | 6123 } |
| 6143 | 6124 |
| 6144 assert(llvm::isa<ConstantInteger32>(Offset) || | 6125 assert(llvm::isa<ConstantInteger32>(Offset) || |
| 6145 llvm::isa<ConstantRelocatable>(Offset)); | 6126 llvm::isa<ConstantRelocatable>(Offset)); |
| 6146 } | 6127 } |
| 6147 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 6128 Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
| 6148 } | 6129 } |
| 6149 // Do legalization, which contains randomization/pooling or do | 6130 // Do legalization, which contains randomization/pooling or do |
| 6150 // randomization/pooling. | 6131 // randomization/pooling. |
| 6151 return llvm::cast<typename Traits::X86OperandMem>( | 6132 return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem) |
| 6152 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); | 6133 : randomizeOrPoolImmediate(Mem)); |
| 6153 } | 6134 } |
| 6154 | 6135 |
| 6155 template <class Machine> | 6136 template <typename TraitsType> |
| 6156 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { | 6137 Variable *TargetX86Base<TraitsType>::makeReg(Type Type, int32_t RegNum) { |
| 6157 // There aren't any 64-bit integer registers for x86-32. | 6138 // There aren't any 64-bit integer registers for x86-32. |
| 6158 assert(Traits::Is64Bit || Type != IceType_i64); | 6139 assert(Traits::Is64Bit || Type != IceType_i64); |
| 6159 Variable *Reg = Func->makeVariable(Type); | 6140 Variable *Reg = Func->makeVariable(Type); |
| 6160 if (RegNum == Variable::NoRegister) | 6141 if (RegNum == Variable::NoRegister) |
| 6161 Reg->setMustHaveReg(); | 6142 Reg->setMustHaveReg(); |
| 6162 else | 6143 else |
| 6163 Reg->setRegNum(RegNum); | 6144 Reg->setRegNum(RegNum); |
| 6164 return Reg; | 6145 return Reg; |
| 6165 } | 6146 } |
| 6166 | 6147 |
| 6167 template <class Machine> | 6148 template <typename TraitsType> |
| 6168 const Type TargetX86Base<Machine>::TypeForSize[] = { | 6149 const Type TargetX86Base<TraitsType>::TypeForSize[] = { |
| 6169 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; | 6150 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; |
| 6170 template <class Machine> | 6151 template <typename TraitsType> |
| 6171 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, | 6152 Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size, |
| 6172 uint32_t MaxSize) { | 6153 uint32_t MaxSize) { |
| 6173 assert(Size != 0); | 6154 assert(Size != 0); |
| 6174 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6155 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
| 6175 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6156 uint32_t MaxIndex = MaxSize == NoSizeLimit |
| 6176 ? llvm::array_lengthof(TypeForSize) - 1 | 6157 ? llvm::array_lengthof(TypeForSize) - 1 |
| 6177 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6158 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
| 6178 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6159 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
| 6179 } | 6160 } |
| 6180 | 6161 |
| 6181 template <class Machine> | 6162 template <typename TraitsType> |
| 6182 Type TargetX86Base<Machine>::firstTypeThatFitsSize(uint32_t Size, | 6163 Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size, |
| 6183 uint32_t MaxSize) { | 6164 uint32_t MaxSize) { |
| 6184 assert(Size != 0); | 6165 assert(Size != 0); |
| 6185 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6166 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
| 6186 if (!llvm::isPowerOf2_32(Size)) | 6167 if (!llvm::isPowerOf2_32(Size)) |
| 6187 ++TyIndex; | 6168 ++TyIndex; |
| 6188 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6169 uint32_t MaxIndex = MaxSize == NoSizeLimit |
| 6189 ? llvm::array_lengthof(TypeForSize) - 1 | 6170 ? llvm::array_lengthof(TypeForSize) - 1 |
| 6190 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6171 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
| 6191 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6172 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
| 6192 } | 6173 } |
| 6193 | 6174 |
| 6194 template <class Machine> void TargetX86Base<Machine>::postLower() { | 6175 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { |
| 6195 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 6176 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 6196 return; | 6177 return; |
| 6197 markRedefinitions(); | 6178 markRedefinitions(); |
| 6198 Context.availabilityUpdate(); | 6179 Context.availabilityUpdate(); |
| 6199 } | 6180 } |
| 6200 | 6181 |
| 6201 template <class Machine> | 6182 template <typename TraitsType> |
| 6202 void TargetX86Base<Machine>::makeRandomRegisterPermutation( | 6183 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( |
| 6203 llvm::SmallVectorImpl<int32_t> &Permutation, | 6184 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 6204 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const { | 6185 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const { |
| 6205 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation, | 6186 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation, |
| 6206 ExcludeRegisters, Salt); | 6187 ExcludeRegisters, Salt); |
| 6207 } | 6188 } |
| 6208 | 6189 |
| 6209 template <class Machine> | 6190 template <typename TraitsType> |
| 6210 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { | 6191 void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const { |
| 6211 if (!BuildDefs::dump()) | 6192 if (!BuildDefs::dump()) |
| 6212 return; | 6193 return; |
| 6213 Ostream &Str = Ctx->getStrEmit(); | 6194 Ostream &Str = Ctx->getStrEmit(); |
| 6214 Str << getConstantPrefix() << C->getValue(); | 6195 Str << getConstantPrefix() << C->getValue(); |
| 6215 } | 6196 } |
| 6216 | 6197 |
| 6217 template <class Machine> | 6198 template <typename TraitsType> |
| 6218 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const { | 6199 void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const { |
| 6219 if (!Traits::Is64Bit) { | 6200 if (!Traits::Is64Bit) { |
| 6220 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 6201 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); |
| 6221 } else { | 6202 } else { |
| 6222 if (!BuildDefs::dump()) | 6203 if (!BuildDefs::dump()) |
| 6223 return; | 6204 return; |
| 6224 Ostream &Str = Ctx->getStrEmit(); | 6205 Ostream &Str = Ctx->getStrEmit(); |
| 6225 Str << getConstantPrefix() << C->getValue(); | 6206 Str << getConstantPrefix() << C->getValue(); |
| 6226 } | 6207 } |
| 6227 } | 6208 } |
| 6228 | 6209 |
| 6229 template <class Machine> | 6210 template <typename TraitsType> |
| 6230 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { | 6211 void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const { |
| 6231 if (!BuildDefs::dump()) | 6212 if (!BuildDefs::dump()) |
| 6232 return; | 6213 return; |
| 6233 Ostream &Str = Ctx->getStrEmit(); | 6214 Ostream &Str = Ctx->getStrEmit(); |
| 6234 C->emitPoolLabel(Str, Ctx); | 6215 C->emitPoolLabel(Str, Ctx); |
| 6235 } | 6216 } |
| 6236 | 6217 |
| 6237 template <class Machine> | 6218 template <typename TraitsType> |
| 6238 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const { | 6219 void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const { |
| 6239 if (!BuildDefs::dump()) | 6220 if (!BuildDefs::dump()) |
| 6240 return; | 6221 return; |
| 6241 Ostream &Str = Ctx->getStrEmit(); | 6222 Ostream &Str = Ctx->getStrEmit(); |
| 6242 C->emitPoolLabel(Str, Ctx); | 6223 C->emitPoolLabel(Str, Ctx); |
| 6243 } | 6224 } |
| 6244 | 6225 |
| 6245 template <class Machine> | 6226 template <typename TraitsType> |
| 6246 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { | 6227 void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const { |
| 6247 llvm::report_fatal_error("undef value encountered by emitter."); | 6228 llvm::report_fatal_error("undef value encountered by emitter."); |
| 6248 } | 6229 } |
| 6249 | 6230 |
| 6250 /// Randomize or pool an Immediate. | 6231 /// Randomize or pool an Immediate. |
| 6251 template <class Machine> | 6232 template <typename TraitsType> |
| 6252 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, | 6233 Operand * |
| 6253 int32_t RegNum) { | 6234 TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate, |
| 6235 int32_t RegNum) { | |
| 6254 assert(llvm::isa<ConstantInteger32>(Immediate) || | 6236 assert(llvm::isa<ConstantInteger32>(Immediate) || |
| 6255 llvm::isa<ConstantRelocatable>(Immediate)); | 6237 llvm::isa<ConstantRelocatable>(Immediate)); |
| 6256 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 6238 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 6257 RandomizationPoolingPaused == true) { | 6239 RandomizationPoolingPaused == true) { |
| 6258 // Immediates randomization/pooling off or paused | 6240 // Immediates randomization/pooling off or paused |
| 6259 return Immediate; | 6241 return Immediate; |
| 6260 } | 6242 } |
| 6261 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { | 6243 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { |
| 6262 Ctx->statsUpdateRPImms(); | 6244 Ctx->statsUpdateRPImms(); |
| 6263 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == | 6245 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == |
| 6264 RPI_Randomize) { | 6246 RPI_Randomize) { |
| 6265 // blind the constant | 6247 // blind the constant |
| 6266 // FROM: | 6248 // FROM: |
| 6267 // imm | 6249 // imm |
| 6268 // TO: | 6250 // TO: |
| 6269 // insert: mov imm+cookie, Reg | 6251 // insert: mov imm+cookie, Reg |
| 6270 // insert: lea -cookie[Reg], Reg | 6252 // insert: lea -cookie[Reg], Reg |
| 6271 // => Reg | 6253 // => Reg |
| 6272 // If we have already assigned a phy register, we must come from | 6254 // If we have already assigned a phy register, we must come from |
| 6273 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 6255 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
| 6274 // assigned register as this assignment is that start of its use-def | 6256 // assigned register as this assignment is that start of its use-def |
| 6275 // chain. So we add RegNum argument here. Note we use 'lea' instruction | 6257 // chain. So we add RegNum argument here. Note we use 'lea' instruction |
| 6276 // instead of 'xor' to avoid affecting the flags. | 6258 // instead of 'xor' to avoid affecting the flags. |
| 6277 Variable *Reg = makeReg(IceType_i32, RegNum); | 6259 Variable *Reg = makeReg(IceType_i32, RegNum); |
| 6278 auto *Integer = llvm::cast<ConstantInteger32>(Immediate); | 6260 auto *Integer = llvm::cast<ConstantInteger32>(Immediate); |
| 6279 uint32_t Value = Integer->getValue(); | 6261 uint32_t Value = Integer->getValue(); |
| 6280 uint32_t Cookie = Func->getConstantBlindingCookie(); | 6262 uint32_t Cookie = Func->getConstantBlindingCookie(); |
| 6281 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); | 6263 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); |
| 6282 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); | 6264 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); |
| 6283 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset, | 6265 _lea(Reg, |
| 6284 nullptr, 0)); | 6266 X86OperandMem::create(Func, IceType_i32, Reg, Offset, nullptr, 0)); |
| 6285 if (Immediate->getType() != IceType_i32) { | 6267 if (Immediate->getType() != IceType_i32) { |
| 6286 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); | 6268 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); |
| 6287 _mov(TruncReg, Reg); | 6269 _mov(TruncReg, Reg); |
| 6288 return TruncReg; | 6270 return TruncReg; |
| 6289 } | 6271 } |
| 6290 return Reg; | 6272 return Reg; |
| 6291 } | 6273 } |
| 6292 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { | 6274 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { |
| 6293 // pool the constant | 6275 // pool the constant |
| 6294 // FROM: | 6276 // FROM: |
| 6295 // imm | 6277 // imm |
| 6296 // TO: | 6278 // TO: |
| 6297 // insert: mov $label, Reg | 6279 // insert: mov $label, Reg |
| 6298 // => Reg | 6280 // => Reg |
| 6299 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); | 6281 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); |
| 6300 Immediate->setShouldBePooled(true); | 6282 Immediate->setShouldBePooled(true); |
| 6301 // if we have already assigned a phy register, we must come from | 6283 // if we have already assigned a phy register, we must come from |
| 6302 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 6284 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
| 6303 // assigned register as this assignment is that start of its use-def | 6285 // assigned register as this assignment is that start of its use-def |
| 6304 // chain. So we add RegNum argument here. | 6286 // chain. So we add RegNum argument here. |
| 6305 Variable *Reg = makeReg(Immediate->getType(), RegNum); | 6287 Variable *Reg = makeReg(Immediate->getType(), RegNum); |
| 6306 IceString Label; | 6288 IceString Label; |
| 6307 llvm::raw_string_ostream Label_stream(Label); | 6289 llvm::raw_string_ostream Label_stream(Label); |
| 6308 Immediate->emitPoolLabel(Label_stream, Ctx); | 6290 Immediate->emitPoolLabel(Label_stream, Ctx); |
| 6309 constexpr RelocOffsetT Offset = 0; | 6291 constexpr RelocOffsetT Offset = 0; |
| 6310 constexpr bool SuppressMangling = true; | 6292 constexpr bool SuppressMangling = true; |
| 6311 Constant *Symbol = | 6293 Constant *Symbol = |
| 6312 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); | 6294 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); |
| 6313 typename Traits::X86OperandMem *MemOperand = | 6295 X86OperandMem *MemOperand = |
| 6314 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, | 6296 X86OperandMem::create(Func, Immediate->getType(), nullptr, Symbol); |
| 6315 Symbol); | |
| 6316 _mov(Reg, MemOperand); | 6297 _mov(Reg, MemOperand); |
| 6317 return Reg; | 6298 return Reg; |
| 6318 } | 6299 } |
| 6319 assert("Unsupported -randomize-pool-immediates option" && false); | 6300 assert("Unsupported -randomize-pool-immediates option" && false); |
| 6320 } | 6301 } |
| 6321 // the constant Immediate is not eligible for blinding/pooling | 6302 // the constant Immediate is not eligible for blinding/pooling |
| 6322 return Immediate; | 6303 return Immediate; |
| 6323 } | 6304 } |
| 6324 | 6305 |
| 6325 template <class Machine> | 6306 template <typename TraitsType> |
| 6326 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 6307 typename TargetX86Base<TraitsType>::X86OperandMem * |
| 6327 TargetX86Base<Machine>::randomizeOrPoolImmediate( | 6308 TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand, |
| 6328 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) { | 6309 int32_t RegNum) { |
| 6329 assert(MemOperand); | 6310 assert(MemOperand); |
| 6330 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 6311 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 6331 RandomizationPoolingPaused == true) { | 6312 RandomizationPoolingPaused == true) { |
| 6332 // immediates randomization/pooling is turned off | 6313 // immediates randomization/pooling is turned off |
| 6333 return MemOperand; | 6314 return MemOperand; |
| 6334 } | 6315 } |
| 6335 | 6316 |
| 6336 // If this memory operand is already a randomized one, we do not randomize it | 6317 // If this memory operand is already a randomized one, we do not randomize it |
| 6337 // again. | 6318 // again. |
| 6338 if (MemOperand->getRandomized()) | 6319 if (MemOperand->getRandomized()) |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 6352 // => -cookie[RegTemp, index, shift] | 6333 // => -cookie[RegTemp, index, shift] |
| 6353 uint32_t Value = | 6334 uint32_t Value = |
| 6354 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) | 6335 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) |
| 6355 ->getValue(); | 6336 ->getValue(); |
| 6356 uint32_t Cookie = Func->getConstantBlindingCookie(); | 6337 uint32_t Cookie = Func->getConstantBlindingCookie(); |
| 6357 Constant *Mask1 = Ctx->getConstantInt( | 6338 Constant *Mask1 = Ctx->getConstantInt( |
| 6358 MemOperand->getOffset()->getType(), Cookie + Value); | 6339 MemOperand->getOffset()->getType(), Cookie + Value); |
| 6359 Constant *Mask2 = | 6340 Constant *Mask2 = |
| 6360 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); | 6341 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); |
| 6361 | 6342 |
| 6362 typename Traits::X86OperandMem *TempMemOperand = | 6343 X86OperandMem *TempMemOperand = X86OperandMem::create( |
| 6363 Traits::X86OperandMem::create(Func, MemOperand->getType(), | 6344 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); |
| 6364 MemOperand->getBase(), Mask1); | |
| 6365 // If we have already assigned a physical register, we must come from | 6345 // If we have already assigned a physical register, we must come from |
| 6366 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse | 6346 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse |
| 6367 // the assigned register as this assignment is that start of its | 6347 // the assigned register as this assignment is that start of its |
| 6368 // use-def chain. So we add RegNum argument here. | 6348 // use-def chain. So we add RegNum argument here. |
| 6369 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); | 6349 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); |
| 6370 _lea(RegTemp, TempMemOperand); | 6350 _lea(RegTemp, TempMemOperand); |
| 6371 | 6351 |
| 6372 typename Traits::X86OperandMem *NewMemOperand = | 6352 X86OperandMem *NewMemOperand = X86OperandMem::create( |
| 6373 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, | 6353 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(), |
| 6374 Mask2, MemOperand->getIndex(), | 6354 MemOperand->getShift(), MemOperand->getSegmentRegister()); |
| 6375 MemOperand->getShift(), | |
| 6376 MemOperand->getSegmentRegister()); | |
| 6377 | 6355 |
| 6378 // Label this memory operand as randomized, so we won't randomize it | 6356 // Label this memory operand as randomized, so we won't randomize it |
| 6379 // again in case we call legalize() multiple times on this memory | 6357 // again in case we call legalize() multiple times on this memory |
| 6380 // operand. | 6358 // operand. |
| 6381 NewMemOperand->setRandomized(true); | 6359 NewMemOperand->setRandomized(true); |
| 6382 return NewMemOperand; | 6360 return NewMemOperand; |
| 6383 } | 6361 } |
| 6384 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { | 6362 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { |
| 6385 // pool the constant offset | 6363 // pool the constant offset |
| 6386 // FROM: | 6364 // FROM: |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 6400 return MemOperand; | 6378 return MemOperand; |
| 6401 Variable *RegTemp = makeReg(IceType_i32); | 6379 Variable *RegTemp = makeReg(IceType_i32); |
| 6402 IceString Label; | 6380 IceString Label; |
| 6403 llvm::raw_string_ostream Label_stream(Label); | 6381 llvm::raw_string_ostream Label_stream(Label); |
| 6404 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); | 6382 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); |
| 6405 MemOperand->getOffset()->setShouldBePooled(true); | 6383 MemOperand->getOffset()->setShouldBePooled(true); |
| 6406 constexpr RelocOffsetT SymOffset = 0; | 6384 constexpr RelocOffsetT SymOffset = 0; |
| 6407 constexpr bool SuppressMangling = true; | 6385 constexpr bool SuppressMangling = true; |
| 6408 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), | 6386 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), |
| 6409 SuppressMangling); | 6387 SuppressMangling); |
| 6410 typename Traits::X86OperandMem *SymbolOperand = | 6388 X86OperandMem *SymbolOperand = X86OperandMem::create( |
| 6411 Traits::X86OperandMem::create( | 6389 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); |
| 6412 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); | |
| 6413 _mov(RegTemp, SymbolOperand); | 6390 _mov(RegTemp, SymbolOperand); |
| 6414 // If we have a base variable here, we should add the lea instruction | 6391 // If we have a base variable here, we should add the lea instruction |
| 6415 // to add the value of the base variable to RegTemp. If there is no | 6392 // to add the value of the base variable to RegTemp. If there is no |
| 6416 // base variable, we won't need this lea instruction. | 6393 // base variable, we won't need this lea instruction. |
| 6417 if (MemOperand->getBase()) { | 6394 if (MemOperand->getBase()) { |
| 6418 typename Traits::X86OperandMem *CalculateOperand = | 6395 X86OperandMem *CalculateOperand = X86OperandMem::create( |
| 6419 Traits::X86OperandMem::create( | 6396 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, |
| 6420 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, | 6397 RegTemp, 0, MemOperand->getSegmentRegister()); |
| 6421 RegTemp, 0, MemOperand->getSegmentRegister()); | |
| 6422 _lea(RegTemp, CalculateOperand); | 6398 _lea(RegTemp, CalculateOperand); |
| 6423 } | 6399 } |
| 6424 typename Traits::X86OperandMem *NewMemOperand = | 6400 X86OperandMem *NewMemOperand = X86OperandMem::create( |
| 6425 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, | 6401 Func, MemOperand->getType(), RegTemp, nullptr, |
| 6426 nullptr, MemOperand->getIndex(), | 6402 MemOperand->getIndex(), MemOperand->getShift(), |
| 6427 MemOperand->getShift(), | 6403 MemOperand->getSegmentRegister()); |
| 6428 MemOperand->getSegmentRegister()); | |
| 6429 return NewMemOperand; | 6404 return NewMemOperand; |
| 6430 } | 6405 } |
| 6431 assert("Unsupported -randomize-pool-immediates option" && false); | 6406 assert("Unsupported -randomize-pool-immediates option" && false); |
| 6432 } | 6407 } |
| 6433 } | 6408 } |
| 6434 // the offset is not eligible for blinding or pooling, return the original | 6409 // the offset is not eligible for blinding or pooling, return the original |
| 6435 // mem operand | 6410 // mem operand |
| 6436 return MemOperand; | 6411 return MemOperand; |
| 6437 } | 6412 } |
| 6438 | 6413 |
| 6439 } // end of namespace X86Internal | 6414 } // end of namespace X86NAMESPACE |
| 6440 } // end of namespace Ice | 6415 } // end of namespace Ice |
| 6441 | 6416 |
| 6442 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6417 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |