OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// \brief Implements the TargetLoweringX86Base class, which consists almost | 11 /// \brief Implements the TargetLoweringX86Base class, which consists almost |
12 /// entirely of the lowering sequence for each high-level instruction. | 12 /// entirely of the lowering sequence for each high-level instruction. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 | 15 |
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
18 | 18 |
19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
20 #include "IceCfgNode.h" | 20 #include "IceCfgNode.h" |
21 #include "IceClFlags.h" | 21 #include "IceClFlags.h" |
22 #include "IceDefs.h" | 22 #include "IceDefs.h" |
23 #include "IceELFObjectWriter.h" | 23 #include "IceELFObjectWriter.h" |
24 #include "IceGlobalInits.h" | 24 #include "IceGlobalInits.h" |
25 #include "IceInstVarIter.h" | 25 #include "IceInstVarIter.h" |
26 #include "IceLiveness.h" | 26 #include "IceLiveness.h" |
27 #include "IceOperand.h" | 27 #include "IceOperand.h" |
28 #include "IcePhiLoweringImpl.h" | 28 #include "IcePhiLoweringImpl.h" |
29 #include "IceUtils.h" | 29 #include "IceUtils.h" |
30 #include "IceInstX86Base.h" | |
Jim Stichnoth
2016/01/03 18:20:02
alphabetize includes
| |
30 #include "llvm/Support/MathExtras.h" | 31 #include "llvm/Support/MathExtras.h" |
31 | 32 |
32 #include <stack> | 33 #include <stack> |
33 | 34 |
34 namespace Ice { | 35 namespace Ice { |
35 namespace X86Internal { | 36 namespace X86NAMESPACE { |
36 | 37 |
37 /// A helper class to ease the settings of RandomizationPoolingPause to disable | 38 /// A helper class to ease the settings of RandomizationPoolingPause to disable |
38 /// constant blinding or pooling for some translation phases. | 39 /// constant blinding or pooling for some translation phases. |
39 class BoolFlagSaver { | 40 class BoolFlagSaver { |
40 BoolFlagSaver() = delete; | 41 BoolFlagSaver() = delete; |
41 BoolFlagSaver(const BoolFlagSaver &) = delete; | 42 BoolFlagSaver(const BoolFlagSaver &) = delete; |
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; | 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; |
43 | 44 |
44 public: | 45 public: |
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } | 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } |
46 ~BoolFlagSaver() { Flag = OldValue; } | 47 ~BoolFlagSaver() { Flag = OldValue; } |
47 | 48 |
48 private: | 49 private: |
49 const bool OldValue; | 50 const bool OldValue; |
50 bool &Flag; | 51 bool &Flag; |
51 }; | 52 }; |
52 | 53 |
53 template <class MachineTraits> class BoolFoldingEntry { | 54 template <typename Traits> class BoolFoldingEntry { |
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | 55 BoolFoldingEntry(const BoolFoldingEntry &) = delete; |
55 | 56 |
56 public: | 57 public: |
57 BoolFoldingEntry() = default; | 58 BoolFoldingEntry() = default; |
58 explicit BoolFoldingEntry(Inst *I); | 59 explicit BoolFoldingEntry(Inst *I); |
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | 60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; |
60 /// Instr is the instruction producing the i1-type variable of interest. | 61 /// Instr is the instruction producing the i1-type variable of interest. |
61 Inst *Instr = nullptr; | 62 Inst *Instr = nullptr; |
62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | 63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). |
63 bool IsComplex = false; | 64 bool IsComplex = false; |
64 /// IsLiveOut is initialized conservatively to true, and is set to false when | 65 /// IsLiveOut is initialized conservatively to true, and is set to false when |
65 /// we encounter an instruction that ends Var's live range. We disable the | 66 /// we encounter an instruction that ends Var's live range. We disable the |
66 /// folding optimization when Var is live beyond this basic block. Note that | 67 /// folding optimization when Var is live beyond this basic block. Note that |
67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | 68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will |
68 /// always be true and the folding optimization will never be performed. | 69 /// always be true and the folding optimization will never be performed. |
69 bool IsLiveOut = true; | 70 bool IsLiveOut = true; |
70 // NumUses counts the number of times Var is used as a source operand in the | 71 // NumUses counts the number of times Var is used as a source operand in the |
71 // basic block. If IsComplex is true and there is more than one use of Var, | 72 // basic block. If IsComplex is true and there is more than one use of Var, |
72 // then the folding optimization is disabled for Var. | 73 // then the folding optimization is disabled for Var. |
73 uint32_t NumUses = 0; | 74 uint32_t NumUses = 0; |
74 }; | 75 }; |
75 | 76 |
76 template <class MachineTraits> class BoolFolding { | 77 template <typename Traits> class BoolFolding { |
77 public: | 78 public: |
78 enum BoolFoldingProducerKind { | 79 enum BoolFoldingProducerKind { |
79 PK_None, | 80 PK_None, |
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. | 81 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. |
81 PK_Icmp32, | 82 PK_Icmp32, |
82 PK_Icmp64, | 83 PK_Icmp64, |
83 PK_Fcmp, | 84 PK_Fcmp, |
84 PK_Trunc, | 85 PK_Trunc, |
85 PK_Arith // A flag-setting arithmetic instruction. | 86 PK_Arith // A flag-setting arithmetic instruction. |
86 }; | 87 }; |
(...skipping 19 matching lines...) Expand all Loading... | |
106 void dump(const Cfg *Func) const; | 107 void dump(const Cfg *Func) const; |
107 | 108 |
108 private: | 109 private: |
109 /// Returns true if Producers contains a valid entry for the given VarNum. | 110 /// Returns true if Producers contains a valid entry for the given VarNum. |
110 bool containsValid(SizeT VarNum) const { | 111 bool containsValid(SizeT VarNum) const { |
111 auto Element = Producers.find(VarNum); | 112 auto Element = Producers.find(VarNum); |
112 return Element != Producers.end() && Element->second.Instr != nullptr; | 113 return Element != Producers.end() && Element->second.Instr != nullptr; |
113 } | 114 } |
114 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | 115 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } |
115 /// Producers maps Variable::Number to a BoolFoldingEntry. | 116 /// Producers maps Variable::Number to a BoolFoldingEntry. |
116 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; | 117 std::unordered_map<SizeT, BoolFoldingEntry<Traits>> Producers; |
117 }; | 118 }; |
118 | 119 |
119 template <class MachineTraits> | 120 template <typename Traits> |
120 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | 121 BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I) |
121 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 122 : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {} |
122 | 123 |
123 template <class MachineTraits> | 124 template <typename Traits> |
124 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 125 typename BoolFolding<Traits>::BoolFoldingProducerKind |
125 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 126 BoolFolding<Traits>::getProducerKind(const Inst *Instr) { |
126 if (llvm::isa<InstIcmp>(Instr)) { | 127 if (llvm::isa<InstIcmp>(Instr)) { |
127 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) | 128 if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) |
128 return PK_Icmp32; | 129 return PK_Icmp32; |
129 return PK_Icmp64; | 130 return PK_Icmp64; |
130 } | 131 } |
131 if (llvm::isa<InstFcmp>(Instr)) | 132 if (llvm::isa<InstFcmp>(Instr)) |
132 return PK_Fcmp; | 133 return PK_Fcmp; |
133 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 134 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { |
134 if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) { | 135 if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) { |
135 switch (Arith->getOp()) { | 136 switch (Arith->getOp()) { |
136 default: | 137 default: |
137 return PK_None; | 138 return PK_None; |
138 case InstArithmetic::And: | 139 case InstArithmetic::And: |
139 case InstArithmetic::Or: | 140 case InstArithmetic::Or: |
140 return PK_Arith; | 141 return PK_Arith; |
141 } | 142 } |
142 } | 143 } |
143 } | 144 } |
144 return PK_None; // TODO(stichnot): remove this | 145 return PK_None; // TODO(stichnot): remove this |
145 | 146 |
146 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 147 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
147 switch (Cast->getCastKind()) { | 148 switch (Cast->getCastKind()) { |
148 default: | 149 default: |
149 return PK_None; | 150 return PK_None; |
150 case InstCast::Trunc: | 151 case InstCast::Trunc: |
151 return PK_Trunc; | 152 return PK_Trunc; |
152 } | 153 } |
153 } | 154 } |
154 return PK_None; | 155 return PK_None; |
155 } | 156 } |
156 | 157 |
157 template <class MachineTraits> | 158 template <typename Traits> |
158 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind | 159 typename BoolFolding<Traits>::BoolFoldingConsumerKind |
159 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) { | 160 BoolFolding<Traits>::getConsumerKind(const Inst *Instr) { |
160 if (llvm::isa<InstBr>(Instr)) | 161 if (llvm::isa<InstBr>(Instr)) |
161 return CK_Br; | 162 return CK_Br; |
162 if (llvm::isa<InstSelect>(Instr)) | 163 if (llvm::isa<InstSelect>(Instr)) |
163 return CK_Select; | 164 return CK_Select; |
164 return CK_None; // TODO(stichnot): remove this | 165 return CK_None; // TODO(stichnot): remove this |
165 | 166 |
166 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 167 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
167 switch (Cast->getCastKind()) { | 168 switch (Cast->getCastKind()) { |
168 default: | 169 default: |
169 return CK_None; | 170 return CK_None; |
170 case InstCast::Sext: | 171 case InstCast::Sext: |
171 return CK_Sext; | 172 return CK_Sext; |
172 case InstCast::Zext: | 173 case InstCast::Zext: |
173 return CK_Zext; | 174 return CK_Zext; |
174 } | 175 } |
175 } | 176 } |
176 return CK_None; | 177 return CK_None; |
177 } | 178 } |
178 | 179 |
179 /// Returns true if the producing instruction has a "complex" lowering sequence. | 180 /// Returns true if the producing instruction has a "complex" lowering sequence. |
180 /// This generally means that its lowering sequence requires more than one | 181 /// This generally means that its lowering sequence requires more than one |
181 /// conditional branch, namely 64-bit integer compares and some floating-point | 182 /// conditional branch, namely 64-bit integer compares and some floating-point |
182 /// compares. When this is true, and there is more than one consumer, we prefer | 183 /// compares. When this is true, and there is more than one consumer, we prefer |
183 /// to disable the folding optimization because it minimizes branches. | 184 /// to disable the folding optimization because it minimizes branches. |
184 template <class MachineTraits> | 185 template <typename Traits> |
185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 186 bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) { |
186 switch (getProducerKind(Instr)) { | 187 switch (getProducerKind(Instr)) { |
187 default: | 188 default: |
188 return false; | 189 return false; |
189 case PK_Icmp64: | 190 case PK_Icmp64: |
190 return !MachineTraits::Is64Bit; | 191 return !Traits::Is64Bit; |
191 case PK_Fcmp: | 192 case PK_Fcmp: |
192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 193 return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != |
193 .C2 != MachineTraits::Cond::Br_None; | 194 Traits::Cond::Br_None; |
194 } | 195 } |
195 } | 196 } |
196 | 197 |
197 template <class MachineTraits> | 198 template <typename Traits> |
198 bool BoolFolding<MachineTraits>::isValidFolding( | 199 bool BoolFolding<Traits>::isValidFolding( |
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, | 200 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind, |
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { | 201 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) { |
201 switch (ProducerKind) { | 202 switch (ProducerKind) { |
202 default: | 203 default: |
203 return false; | 204 return false; |
204 case PK_Icmp32: | 205 case PK_Icmp32: |
205 case PK_Icmp64: | 206 case PK_Icmp64: |
206 case PK_Fcmp: | 207 case PK_Fcmp: |
207 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); | 208 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); |
208 case PK_Arith: | 209 case PK_Arith: |
209 return ConsumerKind == CK_Br; | 210 return ConsumerKind == CK_Br; |
210 } | 211 } |
211 } | 212 } |
212 | 213 |
213 template <class MachineTraits> | 214 template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) { |
214 void BoolFolding<MachineTraits>::init(CfgNode *Node) { | |
215 Producers.clear(); | 215 Producers.clear(); |
216 for (Inst &Instr : Node->getInsts()) { | 216 for (Inst &Instr : Node->getInsts()) { |
217 // Check whether Instr is a valid producer. | 217 // Check whether Instr is a valid producer. |
218 Variable *Var = Instr.getDest(); | 218 Variable *Var = Instr.getDest(); |
219 if (!Instr.isDeleted() // only consider non-deleted instructions | 219 if (!Instr.isDeleted() // only consider non-deleted instructions |
220 && Var // only instructions with an actual dest var | 220 && Var // only instructions with an actual dest var |
221 && Var->getType() == IceType_i1 // only bool-type dest vars | 221 && Var->getType() == IceType_i1 // only bool-type dest vars |
222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions | 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
223 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); | 223 Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr); |
224 } | 224 } |
225 // Check each src variable against the map. | 225 // Check each src variable against the map. |
226 FOREACH_VAR_IN_INST(Var, Instr) { | 226 FOREACH_VAR_IN_INST(Var, Instr) { |
227 SizeT VarNum = Var->getIndex(); | 227 SizeT VarNum = Var->getIndex(); |
228 if (!containsValid(VarNum)) | 228 if (!containsValid(VarNum)) |
229 continue; | 229 continue; |
230 // All valid consumers use Var as the first source operand | 230 // All valid consumers use Var as the first source operand |
231 if (IndexOfVarOperandInInst(Var) != 0) { | 231 if (IndexOfVarOperandInInst(Var) != 0) { |
232 setInvalid(VarNum); | 232 setInvalid(VarNum); |
233 continue; | 233 continue; |
234 } | 234 } |
235 // Consumer instructions must be white-listed | 235 // Consumer instructions must be white-listed |
236 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind | 236 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind = |
237 ConsumerKind = getConsumerKind(&Instr); | 237 getConsumerKind(&Instr); |
238 if (ConsumerKind == CK_None) { | 238 if (ConsumerKind == CK_None) { |
239 setInvalid(VarNum); | 239 setInvalid(VarNum); |
240 continue; | 240 continue; |
241 } | 241 } |
242 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 242 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind = |
243 ProducerKind = getProducerKind(Producers[VarNum].Instr); | 243 getProducerKind(Producers[VarNum].Instr); |
244 if (!isValidFolding(ProducerKind, ConsumerKind)) { | 244 if (!isValidFolding(ProducerKind, ConsumerKind)) { |
245 setInvalid(VarNum); | 245 setInvalid(VarNum); |
246 continue; | 246 continue; |
247 } | 247 } |
248 // Avoid creating multiple copies of complex producer instructions. | 248 // Avoid creating multiple copies of complex producer instructions. |
249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { | 249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { |
250 setInvalid(VarNum); | 250 setInvalid(VarNum); |
251 continue; | 251 continue; |
252 } | 252 } |
253 ++Producers[VarNum].NumUses; | 253 ++Producers[VarNum].NumUses; |
(...skipping 12 matching lines...) Expand all Loading... | |
266 continue; | 266 continue; |
267 } | 267 } |
268 // Mark as "dead" rather than outright deleting. This is so that other | 268 // Mark as "dead" rather than outright deleting. This is so that other |
269 // peephole style optimizations during or before lowering have access to | 269 // peephole style optimizations during or before lowering have access to |
270 // this instruction in undeleted form. See for example | 270 // this instruction in undeleted form. See for example |
271 // tryOptimizedCmpxchgCmpBr(). | 271 // tryOptimizedCmpxchgCmpBr(). |
272 I.second.Instr->setDead(); | 272 I.second.Instr->setDead(); |
273 } | 273 } |
274 } | 274 } |
275 | 275 |
276 template <class MachineTraits> | 276 template <typename Traits> |
277 const Inst * | 277 const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const { |
278 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { | |
279 auto *Var = llvm::dyn_cast<const Variable>(Opnd); | 278 auto *Var = llvm::dyn_cast<const Variable>(Opnd); |
280 if (Var == nullptr) | 279 if (Var == nullptr) |
281 return nullptr; | 280 return nullptr; |
282 SizeT VarNum = Var->getIndex(); | 281 SizeT VarNum = Var->getIndex(); |
283 auto Element = Producers.find(VarNum); | 282 auto Element = Producers.find(VarNum); |
284 if (Element == Producers.end()) | 283 if (Element == Producers.end()) |
285 return nullptr; | 284 return nullptr; |
286 return Element->second.Instr; | 285 return Element->second.Instr; |
287 } | 286 } |
288 | 287 |
289 template <class MachineTraits> | 288 template <typename Traits> |
290 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const { | 289 void BoolFolding<Traits>::dump(const Cfg *Func) const { |
291 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) | 290 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) |
292 return; | 291 return; |
293 OstreamLocker L(Func->getContext()); | 292 OstreamLocker L(Func->getContext()); |
294 Ostream &Str = Func->getContext()->getStrDump(); | 293 Ostream &Str = Func->getContext()->getStrDump(); |
295 for (auto &I : Producers) { | 294 for (auto &I : Producers) { |
296 if (I.second.Instr == nullptr) | 295 if (I.second.Instr == nullptr) |
297 continue; | 296 continue; |
298 Str << "Found foldable producer:\n "; | 297 Str << "Found foldable producer:\n "; |
299 I.second.Instr->dump(Func); | 298 I.second.Instr->dump(Func); |
300 Str << "\n"; | 299 Str << "\n"; |
301 } | 300 } |
302 } | 301 } |
303 | 302 |
304 template <class Machine> | 303 template <typename TraitsType> |
305 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) { | 304 void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) { |
306 FoldingInfo.init(Node); | 305 FoldingInfo.init(Node); |
307 FoldingInfo.dump(Func); | 306 FoldingInfo.dump(Func); |
308 } | 307 } |
309 | 308 |
310 template <class Machine> | 309 template <typename TraitsType> |
311 TargetX86Base<Machine>::TargetX86Base(Cfg *Func) | 310 TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func) |
312 : TargetLowering(Func) { | 311 : TargetLowering(Func) { |
313 static_assert( | 312 static_assert( |
314 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == | 313 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == |
315 (TargetInstructionSet::X86InstructionSet_End - | 314 (TargetInstructionSet::X86InstructionSet_End - |
316 TargetInstructionSet::X86InstructionSet_Begin), | 315 TargetInstructionSet::X86InstructionSet_Begin), |
317 "Traits::InstructionSet range different from TargetInstructionSet"); | 316 "Traits::InstructionSet range different from TargetInstructionSet"); |
318 if (Func->getContext()->getFlags().getTargetInstructionSet() != | 317 if (Func->getContext()->getFlags().getTargetInstructionSet() != |
319 TargetInstructionSet::BaseInstructionSet) { | 318 TargetInstructionSet::BaseInstructionSet) { |
320 InstructionSet = static_cast<typename Traits::InstructionSet>( | 319 InstructionSet = static_cast<InstructionSetEnum>( |
321 (Func->getContext()->getFlags().getTargetInstructionSet() - | 320 (Func->getContext()->getFlags().getTargetInstructionSet() - |
322 TargetInstructionSet::X86InstructionSet_Begin) + | 321 TargetInstructionSet::X86InstructionSet_Begin) + |
323 Traits::InstructionSet::Begin); | 322 Traits::InstructionSet::Begin); |
324 } | 323 } |
325 } | 324 } |
326 | 325 |
327 template <class Machine> void TargetX86Base<Machine>::staticInit() { | 326 template <typename TraitsType> void TargetX86Base<TraitsType>::staticInit() { |
328 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); | 327 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); |
329 } | 328 } |
330 | 329 |
331 template <class Machine> void TargetX86Base<Machine>::translateO2() { | 330 template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() { |
332 TimerMarker T(TimerStack::TT_O2, Func); | 331 TimerMarker T(TimerStack::TT_O2, Func); |
333 | 332 |
334 genTargetHelperCalls(); | 333 genTargetHelperCalls(); |
335 Func->dump("After target helper call insertion"); | 334 Func->dump("After target helper call insertion"); |
336 | 335 |
337 // Merge Alloca instructions, and lay out the stack. | 336 // Merge Alloca instructions, and lay out the stack. |
338 static constexpr bool SortAndCombineAllocas = true; | 337 static constexpr bool SortAndCombineAllocas = true; |
339 Func->processAllocas(SortAndCombineAllocas); | 338 Func->processAllocas(SortAndCombineAllocas); |
340 Func->dump("After Alloca processing"); | 339 Func->dump("After Alloca processing"); |
341 | 340 |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
447 Func->dump("After branch optimization"); | 446 Func->dump("After branch optimization"); |
448 | 447 |
449 // Nop insertion if -nop-insertion is enabled. | 448 // Nop insertion if -nop-insertion is enabled. |
450 Func->doNopInsertion(); | 449 Func->doNopInsertion(); |
451 | 450 |
452 // Mark nodes that require sandbox alignment | 451 // Mark nodes that require sandbox alignment |
453 if (Ctx->getFlags().getUseSandboxing()) | 452 if (Ctx->getFlags().getUseSandboxing()) |
454 Func->markNodesForSandboxing(); | 453 Func->markNodesForSandboxing(); |
455 } | 454 } |
456 | 455 |
457 template <class Machine> void TargetX86Base<Machine>::translateOm1() { | 456 template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() { |
458 TimerMarker T(TimerStack::TT_Om1, Func); | 457 TimerMarker T(TimerStack::TT_Om1, Func); |
459 | 458 |
460 genTargetHelperCalls(); | 459 genTargetHelperCalls(); |
461 | 460 |
462 // Do not merge Alloca instructions, and lay out the stack. | 461 // Do not merge Alloca instructions, and lay out the stack. |
463 static constexpr bool SortAndCombineAllocas = false; | 462 static constexpr bool SortAndCombineAllocas = false; |
464 Func->processAllocas(SortAndCombineAllocas); | 463 Func->processAllocas(SortAndCombineAllocas); |
465 Func->dump("After Alloca processing"); | 464 Func->dump("After Alloca processing"); |
466 | 465 |
467 Func->placePhiLoads(); | 466 Func->placePhiLoads(); |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
524 case InstArithmetic::Xor: | 523 case InstArithmetic::Xor: |
525 return true; | 524 return true; |
526 case InstArithmetic::Shl: | 525 case InstArithmetic::Shl: |
527 case InstArithmetic::Lshr: | 526 case InstArithmetic::Lshr: |
528 case InstArithmetic::Ashr: | 527 case InstArithmetic::Ashr: |
529 return false; // TODO(stichnot): implement | 528 return false; // TODO(stichnot): implement |
530 return !isI64; | 529 return !isI64; |
531 } | 530 } |
532 } | 531 } |
533 | 532 |
534 template <class Machine> | 533 template <typename TraitsType> |
535 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { | 534 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { |
536 if (A == B) | 535 if (A == B) |
537 return true; | 536 return true; |
538 if (auto *MemA = llvm::dyn_cast< | 537 if (auto *MemA = |
539 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) { | 538 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>( |
540 if (auto *MemB = llvm::dyn_cast< | 539 A)) { |
541 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) { | 540 if (auto *MemB = |
541 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>( | |
542 B)) { | |
542 return MemA->getBase() == MemB->getBase() && | 543 return MemA->getBase() == MemB->getBase() && |
543 MemA->getOffset() == MemB->getOffset() && | 544 MemA->getOffset() == MemB->getOffset() && |
544 MemA->getIndex() == MemB->getIndex() && | 545 MemA->getIndex() == MemB->getIndex() && |
545 MemA->getShift() == MemB->getShift() && | 546 MemA->getShift() == MemB->getShift() && |
546 MemA->getSegmentRegister() == MemB->getSegmentRegister(); | 547 MemA->getSegmentRegister() == MemB->getSegmentRegister(); |
547 } | 548 } |
548 } | 549 } |
549 return false; | 550 return false; |
550 } | 551 } |
551 | 552 |
552 template <class Machine> void TargetX86Base<Machine>::findRMW() { | 553 template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() { |
553 Func->dump("Before RMW"); | 554 Func->dump("Before RMW"); |
554 if (Func->isVerbose(IceV_RMW)) | 555 if (Func->isVerbose(IceV_RMW)) |
555 Func->getContext()->lockStr(); | 556 Func->getContext()->lockStr(); |
556 for (CfgNode *Node : Func->getNodes()) { | 557 for (CfgNode *Node : Func->getNodes()) { |
557 // Walk through the instructions, considering each sequence of 3 | 558 // Walk through the instructions, considering each sequence of 3 |
558 // instructions, and look for the particular RMW pattern. Note that this | 559 // instructions, and look for the particular RMW pattern. Note that this |
559 // search can be "broken" (false negatives) if there are intervening | 560 // search can be "broken" (false negatives) if there are intervening |
560 // deleted instructions, or intervening instructions that could be safely | 561 // deleted instructions, or intervening instructions that could be safely |
561 // moved out of the way to reveal an RMW pattern. | 562 // moved out of the way to reveal an RMW pattern. |
562 auto E = Node->getInsts().end(); | 563 auto E = Node->getInsts().end(); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
596 // x's live range, and therefore the RMW instruction will be retained and | 597 // x's live range, and therefore the RMW instruction will be retained and |
597 // later lowered. On the other hand, if the RMW instruction does not end | 598 // later lowered. On the other hand, if the RMW instruction does not end |
598 // x's live range, then the Store instruction must still be present, and | 599 // x's live range, then the Store instruction must still be present, and |
599 // therefore the RMW instruction is ignored during lowering because it is | 600 // therefore the RMW instruction is ignored during lowering because it is |
600 // redundant with the Store instruction. | 601 // redundant with the Store instruction. |
601 // | 602 // |
602 // Note that if "a" has further uses, the RMW transformation may still | 603 // Note that if "a" has further uses, the RMW transformation may still |
603 // trigger, resulting in two loads and one store, which is worse than the | 604 // trigger, resulting in two loads and one store, which is worse than the |
604 // original one load and one store. However, this is probably rare, and | 605 // original one load and one store. However, this is probably rare, and |
605 // caching probably keeps it just as fast. | 606 // caching probably keeps it just as fast. |
606 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), | 607 if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(), |
607 Store->getAddr())) | 608 Store->getAddr())) |
608 continue; | 609 continue; |
609 Operand *ArithSrcFromLoad = Arith->getSrc(0); | 610 Operand *ArithSrcFromLoad = Arith->getSrc(0); |
610 Operand *ArithSrcOther = Arith->getSrc(1); | 611 Operand *ArithSrcOther = Arith->getSrc(1); |
611 if (ArithSrcFromLoad != Load->getDest()) { | 612 if (ArithSrcFromLoad != Load->getDest()) { |
612 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) | 613 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) |
613 continue; | 614 continue; |
614 std::swap(ArithSrcFromLoad, ArithSrcOther); | 615 std::swap(ArithSrcFromLoad, ArithSrcOther); |
615 } | 616 } |
616 if (Arith->getDest() != Store->getData()) | 617 if (Arith->getDest() != Store->getData()) |
617 continue; | 618 continue; |
618 if (!canRMW(Arith)) | 619 if (!canRMW(Arith)) |
619 continue; | 620 continue; |
620 if (Func->isVerbose(IceV_RMW)) { | 621 if (Func->isVerbose(IceV_RMW)) { |
621 Ostream &Str = Func->getContext()->getStrDump(); | 622 Ostream &Str = Func->getContext()->getStrDump(); |
622 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; | 623 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; |
623 Load->dump(Func); | 624 Load->dump(Func); |
624 Str << "\n "; | 625 Str << "\n "; |
625 Arith->dump(Func); | 626 Arith->dump(Func); |
626 Str << "\n "; | 627 Str << "\n "; |
627 Store->dump(Func); | 628 Store->dump(Func); |
628 Str << "\n"; | 629 Str << "\n"; |
629 } | 630 } |
630 Variable *Beacon = Func->makeVariable(IceType_i32); | 631 Variable *Beacon = Func->makeVariable(IceType_i32); |
631 Beacon->setMustNotHaveReg(); | 632 Beacon->setMustNotHaveReg(); |
632 Store->setRmwBeacon(Beacon); | 633 Store->setRmwBeacon(Beacon); |
633 auto *BeaconDef = InstFakeDef::create(Func, Beacon); | 634 auto *BeaconDef = InstFakeDef::create(Func, Beacon); |
634 Node->getInsts().insert(I3, BeaconDef); | 635 Node->getInsts().insert(I3, BeaconDef); |
635 auto *RMW = Traits::Insts::FakeRMW::create( | 636 auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(), |
636 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); | 637 Beacon, Arith->getOp()); |
637 Node->getInsts().insert(I3, RMW); | 638 Node->getInsts().insert(I3, RMW); |
638 } | 639 } |
639 } | 640 } |
640 if (Func->isVerbose(IceV_RMW)) | 641 if (Func->isVerbose(IceV_RMW)) |
641 Func->getContext()->unlockStr(); | 642 Func->getContext()->unlockStr(); |
642 } | 643 } |
643 | 644 |
644 // Converts a ConstantInteger32 operand into its constant value, or | 645 // Converts a ConstantInteger32 operand into its constant value, or |
645 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 646 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
646 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 647 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
(...skipping 12 matching lines...) Expand all Loading... | |
659 Src0 = LoadSrc; | 660 Src0 = LoadSrc; |
660 return true; | 661 return true; |
661 } | 662 } |
662 if (Src0 != LoadDest && Src1 == LoadDest) { | 663 if (Src0 != LoadDest && Src1 == LoadDest) { |
663 Src1 = LoadSrc; | 664 Src1 = LoadSrc; |
664 return true; | 665 return true; |
665 } | 666 } |
666 return false; | 667 return false; |
667 } | 668 } |
668 | 669 |
669 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { | 670 template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() { |
670 for (CfgNode *Node : Func->getNodes()) { | 671 for (CfgNode *Node : Func->getNodes()) { |
671 Context.init(Node); | 672 Context.init(Node); |
672 while (!Context.atEnd()) { | 673 while (!Context.atEnd()) { |
673 Variable *LoadDest = nullptr; | 674 Variable *LoadDest = nullptr; |
674 Operand *LoadSrc = nullptr; | 675 Operand *LoadSrc = nullptr; |
675 Inst *CurInst = Context.getCur(); | 676 Inst *CurInst = Context.getCur(); |
676 Inst *Next = Context.getNextInst(); | 677 Inst *Next = Context.getNextInst(); |
677 // Determine whether the current instruction is a Load instruction or | 678 // Determine whether the current instruction is a Load instruction or |
678 // equivalent. | 679 // equivalent. |
679 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 680 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
748 NewInst->spliceLivenessInfo(Next, CurInst); | 749 NewInst->spliceLivenessInfo(Next, CurInst); |
749 } | 750 } |
750 } | 751 } |
751 Context.advanceCur(); | 752 Context.advanceCur(); |
752 Context.advanceNext(); | 753 Context.advanceNext(); |
753 } | 754 } |
754 } | 755 } |
755 Func->dump("After load optimization"); | 756 Func->dump("After load optimization"); |
756 } | 757 } |
757 | 758 |
758 template <class Machine> | 759 template <typename TraitsType> |
759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 760 bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 761 if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) { |
761 return Br->optimizeBranch(NextNode); | 762 return Br->optimizeBranch(NextNode); |
762 } | 763 } |
763 return false; | 764 return false; |
764 } | 765 } |
765 | 766 |
766 template <class Machine> | 767 template <typename TraitsType> |
767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 768 Variable *TargetX86Base<TraitsType>::getPhysicalRegister(SizeT RegNum, |
769 Type Ty) { | |
768 if (Ty == IceType_void) | 770 if (Ty == IceType_void) |
769 Ty = IceType_i32; | 771 Ty = IceType_i32; |
770 if (PhysicalRegisters[Ty].empty()) | 772 if (PhysicalRegisters[Ty].empty()) |
771 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 773 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
772 assert(RegNum < PhysicalRegisters[Ty].size()); | 774 assert(RegNum < PhysicalRegisters[Ty].size()); |
773 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 775 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
774 if (Reg == nullptr) { | 776 if (Reg == nullptr) { |
775 Reg = Func->makeVariable(Ty); | 777 Reg = Func->makeVariable(Ty); |
776 Reg->setRegNum(RegNum); | 778 Reg->setRegNum(RegNum); |
777 PhysicalRegisters[Ty][RegNum] = Reg; | 779 PhysicalRegisters[Ty][RegNum] = Reg; |
778 // Specially mark a named physical register as an "argument" so that it is | 780 // Specially mark a named physical register as an "argument" so that it is |
779 // considered live upon function entry. Otherwise it's possible to get | 781 // considered live upon function entry. Otherwise it's possible to get |
780 // liveness validation errors for saving callee-save registers. | 782 // liveness validation errors for saving callee-save registers. |
781 Func->addImplicitArg(Reg); | 783 Func->addImplicitArg(Reg); |
782 // Don't bother tracking the live range of a named physical register. | 784 // Don't bother tracking the live range of a named physical register. |
783 Reg->setIgnoreLiveness(); | 785 Reg->setIgnoreLiveness(); |
784 } | 786 } |
785 assert(Traits::getGprForType(Ty, RegNum) == static_cast<int32_t>(RegNum)); | 787 assert(Traits::getGprForType(Ty, RegNum) == static_cast<int32_t>(RegNum)); |
786 return Reg; | 788 return Reg; |
787 } | 789 } |
788 | 790 |
789 template <class Machine> | 791 template <typename TraitsType> |
790 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 792 IceString TargetX86Base<TraitsType>::getRegName(SizeT RegNum, Type Ty) const { |
791 return Traits::getRegName(Traits::getGprForType(Ty, RegNum)); | 793 return Traits::getRegName(Traits::getGprForType(Ty, RegNum)); |
792 } | 794 } |
793 | 795 |
794 template <class Machine> | 796 template <typename TraitsType> |
795 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | 797 void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const { |
796 if (!BuildDefs::dump()) | 798 if (!BuildDefs::dump()) |
797 return; | 799 return; |
798 Ostream &Str = Ctx->getStrEmit(); | 800 Ostream &Str = Ctx->getStrEmit(); |
799 if (Var->hasReg()) { | 801 if (Var->hasReg()) { |
800 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 802 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
801 return; | 803 return; |
802 } | 804 } |
803 if (Var->mustHaveReg()) { | 805 if (Var->mustHaveReg()) { |
804 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 806 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
805 } | 807 } |
(...skipping 10 matching lines...) Expand all Loading... | |
816 if (DecorateAsm) { | 818 if (DecorateAsm) { |
817 Str << Var->getSymbolicStackOffset(Func); | 819 Str << Var->getSymbolicStackOffset(Func); |
818 } else { | 820 } else { |
819 Str << Offset; | 821 Str << Offset; |
820 } | 822 } |
821 } | 823 } |
822 const Type FrameSPTy = Traits::WordType; | 824 const Type FrameSPTy = Traits::WordType; |
823 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; | 825 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; |
824 } | 826 } |
825 | 827 |
826 template <class Machine> | 828 template <typename TraitsType> |
827 typename TargetX86Base<Machine>::Traits::Address | 829 typename TargetX86Base<TraitsType>::X86Address |
828 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | 830 TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const { |
829 if (Var->hasReg()) | 831 if (Var->hasReg()) |
830 llvm_unreachable("Stack Variable has a register assigned"); | 832 llvm_unreachable("Stack Variable has a register assigned"); |
831 if (Var->mustHaveReg()) { | 833 if (Var->mustHaveReg()) { |
832 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 834 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
833 } | 835 } |
834 int32_t Offset = Var->getStackOffset(); | 836 int32_t Offset = Var->getStackOffset(); |
835 int32_t BaseRegNum = Var->getBaseRegNum(); | 837 int32_t BaseRegNum = Var->getBaseRegNum(); |
836 if (Var->getBaseRegNum() == Variable::NoRegister) | 838 if (Var->getBaseRegNum() == Variable::NoRegister) |
837 BaseRegNum = getFrameOrStackReg(); | 839 BaseRegNum = getFrameOrStackReg(); |
838 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset, | 840 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset, |
839 AssemblerFixup::NoFixup); | 841 AssemblerFixup::NoFixup); |
840 } | 842 } |
841 | 843 |
842 /// Helper function for addProlog(). | 844 /// Helper function for addProlog(). |
843 /// | 845 /// |
844 /// This assumes Arg is an argument passed on the stack. This sets the frame | 846 /// This assumes Arg is an argument passed on the stack. This sets the frame |
845 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 847 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
846 /// I64 arg that has been split into Lo and Hi components, it calls itself | 848 /// I64 arg that has been split into Lo and Hi components, it calls itself |
847 /// recursively on the components, taking care to handle Lo first because of the | 849 /// recursively on the components, taking care to handle Lo first because of the |
848 /// little-endian architecture. Lastly, this function generates an instruction | 850 /// little-endian architecture. Lastly, this function generates an instruction |
849 /// to copy Arg into its assigned register if applicable. | 851 /// to copy Arg into its assigned register if applicable. |
850 template <class Machine> | 852 template <typename TraitsType> |
851 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 853 void TargetX86Base<TraitsType>::finishArgumentLowering( |
852 Variable *FramePtr, | 854 Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset, |
853 size_t BasicFrameOffset, | 855 size_t StackAdjBytes, size_t &InArgsSizeBytes) { |
854 size_t StackAdjBytes, | |
855 size_t &InArgsSizeBytes) { | |
856 if (!Traits::Is64Bit) { | 856 if (!Traits::Is64Bit) { |
857 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { | 857 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
858 Variable *Lo = Arg64On32->getLo(); | 858 Variable *Lo = Arg64On32->getLo(); |
859 Variable *Hi = Arg64On32->getHi(); | 859 Variable *Hi = Arg64On32->getHi(); |
860 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes, | 860 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes, |
861 InArgsSizeBytes); | 861 InArgsSizeBytes); |
862 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes, | 862 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes, |
863 InArgsSizeBytes); | 863 InArgsSizeBytes); |
864 return; | 864 return; |
865 } | 865 } |
866 } | 866 } |
867 Type Ty = Arg->getType(); | 867 Type Ty = Arg->getType(); |
868 if (isVectorType(Ty)) { | 868 if (isVectorType(Ty)) { |
869 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 869 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
870 } | 870 } |
871 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 871 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
872 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 872 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
873 if (Arg->hasReg()) { | 873 if (Arg->hasReg()) { |
874 assert(Ty != IceType_i64 || Traits::Is64Bit); | 874 assert(Ty != IceType_i64 || Traits::Is64Bit); |
875 auto *Mem = Traits::X86OperandMem::create( | 875 auto *Mem = X86OperandMem::create( |
876 Func, Ty, FramePtr, | 876 Func, Ty, FramePtr, |
877 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes)); | 877 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes)); |
878 if (isVectorType(Arg->getType())) { | 878 if (isVectorType(Arg->getType())) { |
879 _movp(Arg, Mem); | 879 _movp(Arg, Mem); |
880 } else { | 880 } else { |
881 _mov(Arg, Mem); | 881 _mov(Arg, Mem); |
882 } | 882 } |
883 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 883 // This argument-copying instruction uses an explicit X86OperandMem |
884 // operand instead of a Variable, so its fill-from-stack operation has to | 884 // operand instead of a Variable, so its fill-from-stack operation has to |
885 // be tracked separately for statistics. | 885 // be tracked separately for statistics. |
886 Ctx->statsUpdateFills(); | 886 Ctx->statsUpdateFills(); |
887 } | 887 } |
888 } | 888 } |
889 | 889 |
890 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 890 template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() { |
891 return Traits::WordType; | 891 return Traits::WordType; |
892 } | 892 } |
893 | 893 |
894 template <class Machine> | 894 template <typename TraitsType> |
895 template <typename T> | 895 template <typename T> |
896 typename std::enable_if<!T::Is64Bit, Operand>::type * | 896 typename std::enable_if<!T::Is64Bit, Operand>::type * |
897 TargetX86Base<Machine>::loOperand(Operand *Operand) { | 897 TargetX86Base<TraitsType>::loOperand(Operand *Operand) { |
898 assert(Operand->getType() == IceType_i64 || | 898 assert(Operand->getType() == IceType_i64 || |
899 Operand->getType() == IceType_f64); | 899 Operand->getType() == IceType_f64); |
900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
901 return Operand; | 901 return Operand; |
902 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) | 902 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
903 return Var64On32->getLo(); | 903 return Var64On32->getLo(); |
904 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 904 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
905 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 905 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
906 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 906 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
907 // Check if we need to blind/pool the constant. | 907 // Check if we need to blind/pool the constant. |
908 return legalize(ConstInt); | 908 return legalize(ConstInt); |
909 } | 909 } |
910 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 910 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) { |
911 auto *MemOperand = Traits::X86OperandMem::create( | 911 auto *MemOperand = X86OperandMem::create( |
912 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 912 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
913 Mem->getShift(), Mem->getSegmentRegister()); | 913 Mem->getShift(), Mem->getSegmentRegister()); |
914 // Test if we should randomize or pool the offset, if so randomize it or | 914 // Test if we should randomize or pool the offset, if so randomize it or |
915 // pool it then create mem operand with the blinded/pooled constant. | 915 // pool it then create mem operand with the blinded/pooled constant. |
916 // Otherwise, return the mem operand as ordinary mem operand. | 916 // Otherwise, return the mem operand as ordinary mem operand. |
917 return legalize(MemOperand); | 917 return legalize(MemOperand); |
918 } | 918 } |
919 llvm_unreachable("Unsupported operand type"); | 919 llvm_unreachable("Unsupported operand type"); |
920 return nullptr; | 920 return nullptr; |
921 } | 921 } |
922 | 922 |
923 template <class Machine> | 923 template <typename TraitsType> |
924 template <typename T> | 924 template <typename T> |
925 typename std::enable_if<!T::Is64Bit, Operand>::type * | 925 typename std::enable_if<!T::Is64Bit, Operand>::type * |
926 TargetX86Base<Machine>::hiOperand(Operand *Operand) { | 926 TargetX86Base<TraitsType>::hiOperand(Operand *Operand) { |
927 assert(Operand->getType() == IceType_i64 || | 927 assert(Operand->getType() == IceType_i64 || |
928 Operand->getType() == IceType_f64); | 928 Operand->getType() == IceType_f64); |
929 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 929 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
930 return Operand; | 930 return Operand; |
931 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) | 931 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
932 return Var64On32->getHi(); | 932 return Var64On32->getHi(); |
933 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 933 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
934 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 934 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
935 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); | 935 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); |
936 // Check if we need to blind/pool the constant. | 936 // Check if we need to blind/pool the constant. |
937 return legalize(ConstInt); | 937 return legalize(ConstInt); |
938 } | 938 } |
939 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 939 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) { |
940 Constant *Offset = Mem->getOffset(); | 940 Constant *Offset = Mem->getOffset(); |
941 if (Offset == nullptr) { | 941 if (Offset == nullptr) { |
942 Offset = Ctx->getConstantInt32(4); | 942 Offset = Ctx->getConstantInt32(4); |
943 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) { | 943 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) { |
944 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); | 944 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); |
945 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) { | 945 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) { |
946 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); | 946 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); |
947 Offset = | 947 Offset = |
948 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), | 948 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), |
949 SymOffset->getSuppressMangling()); | 949 SymOffset->getSuppressMangling()); |
950 } | 950 } |
951 auto *MemOperand = Traits::X86OperandMem::create( | 951 auto *MemOperand = X86OperandMem::create( |
952 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), | 952 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), |
953 Mem->getShift(), Mem->getSegmentRegister()); | 953 Mem->getShift(), Mem->getSegmentRegister()); |
954 // Test if the Offset is an eligible i32 constants for randomization and | 954 // Test if the Offset is an eligible i32 constants for randomization and |
955 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem | 955 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem |
956 // operand. | 956 // operand. |
957 return legalize(MemOperand); | 957 return legalize(MemOperand); |
958 } | 958 } |
959 llvm_unreachable("Unsupported operand type"); | 959 llvm_unreachable("Unsupported operand type"); |
960 return nullptr; | 960 return nullptr; |
961 } | 961 } |
962 | 962 |
963 template <class Machine> | 963 template <typename TraitsType> |
964 llvm::SmallBitVector | 964 llvm::SmallBitVector |
965 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | 965 TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include, |
966 RegSetMask Exclude) const { | 966 RegSetMask Exclude) const { |
967 return Traits::getRegisterSet(Include, Exclude); | 967 return Traits::getRegisterSet(Include, Exclude); |
968 } | 968 } |
969 | 969 |
970 template <class Machine> | 970 template <typename TraitsType> |
971 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | 971 void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Inst) { |
972 // Conservatively require the stack to be aligned. Some stack adjustment | 972 // Conservatively require the stack to be aligned. Some stack adjustment |
973 // operations implemented below assume that the stack is aligned before the | 973 // operations implemented below assume that the stack is aligned before the |
974 // alloca. All the alloca code ensures that the stack alignment is preserved | 974 // alloca. All the alloca code ensures that the stack alignment is preserved |
975 // after the alloca. The stack alignment restriction can be relaxed in some | 975 // after the alloca. The stack alignment restriction can be relaxed in some |
976 // cases. | 976 // cases. |
977 NeedsStackAlignment = true; | 977 NeedsStackAlignment = true; |
978 | 978 |
979 // For default align=0, set it to the real value 1, to avoid any | 979 // For default align=0, set it to the real value 1, to avoid any |
980 // bit-manipulation problems below. | 980 // bit-manipulation problems below. |
981 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes()); | 981 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes()); |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1027 _mov(T, TotalSize); | 1027 _mov(T, TotalSize); |
1028 } | 1028 } |
1029 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1029 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
1030 _and(T, Ctx->getConstantInt32(-Alignment)); | 1030 _and(T, Ctx->getConstantInt32(-Alignment)); |
1031 _sub(esp, T); | 1031 _sub(esp, T); |
1032 } | 1032 } |
1033 // Add enough to the returned address to account for the out args area. | 1033 // Add enough to the returned address to account for the out args area. |
1034 uint32_t OutArgsSize = maxOutArgsSizeBytes(); | 1034 uint32_t OutArgsSize = maxOutArgsSizeBytes(); |
1035 if (OutArgsSize > 0) { | 1035 if (OutArgsSize > 0) { |
1036 Variable *T = makeReg(IceType_i32); | 1036 Variable *T = makeReg(IceType_i32); |
1037 typename Traits::X86OperandMem *CalculateOperand = | 1037 auto *CalculateOperand = X86OperandMem::create( |
1038 Traits::X86OperandMem::create( | 1038 Func, IceType_i32, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); |
1039 Func, IceType_i32, esp, | |
1040 Ctx->getConstantInt(IceType_i32, OutArgsSize)); | |
1041 _lea(T, CalculateOperand); | 1039 _lea(T, CalculateOperand); |
1042 _mov(Dest, T); | 1040 _mov(Dest, T); |
1043 } else { | 1041 } else { |
1044 _mov(Dest, esp); | 1042 _mov(Dest, esp); |
1045 } | 1043 } |
1046 } | 1044 } |
1047 | 1045 |
1048 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1046 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
1049 /// narrower) for certain constants. The lea instruction can be used to multiply | 1047 /// narrower) for certain constants. The lea instruction can be used to multiply |
1050 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1048 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
1051 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1049 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
1052 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1050 /// lea-based multiplies by 5, combined with left-shifting by 2. |
1053 template <class Machine> | 1051 template <typename TraitsType> |
1054 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1052 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
1055 int32_t Src1) { | 1053 int32_t Src1) { |
1056 // Disable this optimization for Om1 and O0, just to keep things simple | 1054 // Disable this optimization for Om1 and O0, just to keep things simple |
1057 // there. | 1055 // there. |
1058 if (Ctx->getFlags().getOptLevel() < Opt_1) | 1056 if (Ctx->getFlags().getOptLevel() < Opt_1) |
1059 return false; | 1057 return false; |
1060 Type Ty = Dest->getType(); | 1058 Type Ty = Dest->getType(); |
1061 Variable *T = nullptr; | 1059 Variable *T = nullptr; |
1062 if (Src1 == -1) { | 1060 if (Src1 == -1) { |
1063 _mov(T, Src0); | 1061 _mov(T, Src0); |
1064 _neg(T); | 1062 _neg(T); |
1065 _mov(Dest, T); | 1063 _mov(Dest, T); |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1112 return false; | 1110 return false; |
1113 // Limit the number of lea/shl operations for a single multiply, to a | 1111 // Limit the number of lea/shl operations for a single multiply, to a |
1114 // somewhat arbitrary choice of 3. | 1112 // somewhat arbitrary choice of 3. |
1115 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1113 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
1116 if (CountOps > MaxOpsForOptimizedMul) | 1114 if (CountOps > MaxOpsForOptimizedMul) |
1117 return false; | 1115 return false; |
1118 _mov(T, Src0); | 1116 _mov(T, Src0); |
1119 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1117 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1120 for (uint32_t i = 0; i < Count9; ++i) { | 1118 for (uint32_t i = 0; i < Count9; ++i) { |
1121 constexpr uint16_t Shift = 3; // log2(9-1) | 1119 constexpr uint16_t Shift = 3; // log2(9-1) |
1122 _lea(T, | 1120 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1123 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | |
1124 } | 1121 } |
1125 for (uint32_t i = 0; i < Count5; ++i) { | 1122 for (uint32_t i = 0; i < Count5; ++i) { |
1126 constexpr uint16_t Shift = 2; // log2(5-1) | 1123 constexpr uint16_t Shift = 2; // log2(5-1) |
1127 _lea(T, | 1124 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1128 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | |
1129 } | 1125 } |
1130 for (uint32_t i = 0; i < Count3; ++i) { | 1126 for (uint32_t i = 0; i < Count3; ++i) { |
1131 constexpr uint16_t Shift = 1; // log2(3-1) | 1127 constexpr uint16_t Shift = 1; // log2(3-1) |
1132 _lea(T, | 1128 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1133 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | |
1134 } | 1129 } |
1135 if (Count2) { | 1130 if (Count2) { |
1136 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1131 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
1137 } | 1132 } |
1138 if (Src1IsNegative) | 1133 if (Src1IsNegative) |
1139 _neg(T); | 1134 _neg(T); |
1140 _mov(Dest, T); | 1135 _mov(Dest, T); |
1141 return true; | 1136 return true; |
1142 } | 1137 } |
1143 | 1138 |
1144 template <class Machine> | 1139 template <typename TraitsType> |
1145 void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op, | 1140 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op, |
1146 Operand *Src0Lo, Operand *Src0Hi, | 1141 Operand *Src0Lo, Operand *Src0Hi, |
1147 Operand *Src1Lo, Variable *DestLo, | 1142 Operand *Src1Lo, Variable *DestLo, |
1148 Variable *DestHi) { | 1143 Variable *DestHi) { |
1149 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | 1144 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. |
1150 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1145 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
1151 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1146 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1152 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1147 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
1153 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | 1148 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
1154 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); | 1149 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
1155 if (ShiftAmount > 32) { | 1150 if (ShiftAmount > 32) { |
1156 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); | 1151 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); |
1157 switch (Op) { | 1152 switch (Op) { |
1158 default: | 1153 default: |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1267 } | 1262 } |
1268 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> | 1263 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> |
1269 // a.lo = t2 | 1264 // a.lo = t2 |
1270 // a.hi = t3 | 1265 // a.hi = t3 |
1271 _mov(DestLo, T_2); | 1266 _mov(DestLo, T_2); |
1272 _mov(DestHi, T_3); | 1267 _mov(DestHi, T_3); |
1273 } | 1268 } |
1274 } else { | 1269 } else { |
1275 // NON-CONSTANT CASES. | 1270 // NON-CONSTANT CASES. |
1276 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1271 Constant *BitTest = Ctx->getConstantInt32(0x20); |
1277 typename Traits::Insts::Label *Label = | 1272 InstX86Label *Label = InstX86Label::create(Func, this); |
1278 Traits::Insts::Label::create(Func, this); | |
1279 // COMMON PREFIX OF: a=b SHIFT_OP c ==> | 1273 // COMMON PREFIX OF: a=b SHIFT_OP c ==> |
1280 // t1:ecx = c.lo & 0xff | 1274 // t1:ecx = c.lo & 0xff |
1281 // t2 = b.lo | 1275 // t2 = b.lo |
1282 // t3 = b.hi | 1276 // t3 = b.hi |
1283 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl); | 1277 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl); |
1284 _mov(T_2, Src0Lo); | 1278 _mov(T_2, Src0Lo); |
1285 _mov(T_3, Src0Hi); | 1279 _mov(T_3, Src0Hi); |
1286 switch (Op) { | 1280 switch (Op) { |
1287 default: | 1281 default: |
1288 assert(0 && "non-shift op"); | 1282 assert(0 && "non-shift op"); |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1348 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> | 1342 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> |
1349 // L1: | 1343 // L1: |
1350 // a.lo = t2 | 1344 // a.lo = t2 |
1351 // a.hi = t3 | 1345 // a.hi = t3 |
1352 Context.insert(Label); | 1346 Context.insert(Label); |
1353 _mov(DestLo, T_2); | 1347 _mov(DestLo, T_2); |
1354 _mov(DestHi, T_3); | 1348 _mov(DestHi, T_3); |
1355 } | 1349 } |
1356 } | 1350 } |
1357 | 1351 |
1358 template <class Machine> | 1352 template <typename TraitsType> |
1359 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1353 void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) { |
1360 Variable *Dest = Inst->getDest(); | 1354 Variable *Dest = Inst->getDest(); |
1361 if (Dest->isRematerializable()) { | 1355 if (Dest->isRematerializable()) { |
1362 Context.insert<InstFakeDef>(Dest); | 1356 Context.insert<InstFakeDef>(Dest); |
1363 return; | 1357 return; |
1364 } | 1358 } |
1365 Type Ty = Dest->getType(); | 1359 Type Ty = Dest->getType(); |
1366 Operand *Src0 = legalize(Inst->getSrc(0)); | 1360 Operand *Src0 = legalize(Inst->getSrc(0)); |
1367 Operand *Src1 = legalize(Inst->getSrc(1)); | 1361 Operand *Src1 = legalize(Inst->getSrc(1)); |
1368 if (Inst->isCommutative()) { | 1362 if (Inst->isCommutative()) { |
1369 uint32_t SwapCount = 0; | 1363 uint32_t SwapCount = 0; |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1501 case InstArithmetic::Srem: | 1495 case InstArithmetic::Srem: |
1502 llvm_unreachable("Call-helper-involved instruction for i64 type \ | 1496 llvm_unreachable("Call-helper-involved instruction for i64 type \ |
1503 should have already been handled before"); | 1497 should have already been handled before"); |
1504 break; | 1498 break; |
1505 } | 1499 } |
1506 return; | 1500 return; |
1507 } | 1501 } |
1508 if (isVectorType(Ty)) { | 1502 if (isVectorType(Ty)) { |
1509 // TODO: Trap on integer divide and integer modulo by zero. See: | 1503 // TODO: Trap on integer divide and integer modulo by zero. See: |
1510 // https://code.google.com/p/nativeclient/issues/detail?id=3899 | 1504 // https://code.google.com/p/nativeclient/issues/detail?id=3899 |
1511 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) | 1505 if (llvm::isa<X86OperandMem>(Src1)) |
1512 Src1 = legalizeToReg(Src1); | 1506 Src1 = legalizeToReg(Src1); |
1513 switch (Inst->getOp()) { | 1507 switch (Inst->getOp()) { |
1514 case InstArithmetic::_num: | 1508 case InstArithmetic::_num: |
1515 llvm_unreachable("Unknown arithmetic operator"); | 1509 llvm_unreachable("Unknown arithmetic operator"); |
1516 break; | 1510 break; |
1517 case InstArithmetic::Add: { | 1511 case InstArithmetic::Add: { |
1518 Variable *T = makeReg(Ty); | 1512 Variable *T = makeReg(Ty); |
1519 _movp(T, Src0); | 1513 _movp(T, Src0); |
1520 _padd(T, Src1); | 1514 _padd(T, Src1); |
1521 _movp(Dest, T); | 1515 _movp(Dest, T); |
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1920 _mov(T, Src0); | 1914 _mov(T, Src0); |
1921 _divss(T, Src1); | 1915 _divss(T, Src1); |
1922 _mov(Dest, T); | 1916 _mov(Dest, T); |
1923 break; | 1917 break; |
1924 case InstArithmetic::Frem: | 1918 case InstArithmetic::Frem: |
1925 llvm::report_fatal_error("Helper call was expected"); | 1919 llvm::report_fatal_error("Helper call was expected"); |
1926 break; | 1920 break; |
1927 } | 1921 } |
1928 } | 1922 } |
1929 | 1923 |
1930 template <class Machine> | 1924 template <typename TraitsType> |
1931 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1925 void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Inst) { |
1932 Variable *Dest = Inst->getDest(); | 1926 Variable *Dest = Inst->getDest(); |
1933 if (Dest->isRematerializable()) { | 1927 if (Dest->isRematerializable()) { |
1934 Context.insert<InstFakeDef>(Dest); | 1928 Context.insert<InstFakeDef>(Dest); |
1935 return; | 1929 return; |
1936 } | 1930 } |
1937 Operand *Src = Inst->getSrc(0); | 1931 Operand *Src = Inst->getSrc(0); |
1938 assert(Dest->getType() == Src->getType()); | 1932 assert(Dest->getType() == Src->getType()); |
1939 lowerMove(Dest, Src, false); | 1933 lowerMove(Dest, Src, false); |
1940 } | 1934 } |
1941 | 1935 |
1942 template <class Machine> | 1936 template <typename TraitsType> |
1943 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { | 1937 void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) { |
1944 if (Br->isUnconditional()) { | 1938 if (Br->isUnconditional()) { |
1945 _br(Br->getTargetUnconditional()); | 1939 _br(Br->getTargetUnconditional()); |
1946 return; | 1940 return; |
1947 } | 1941 } |
1948 Operand *Cond = Br->getCondition(); | 1942 Operand *Cond = Br->getCondition(); |
1949 | 1943 |
1950 // Handle folding opportunities. | 1944 // Handle folding opportunities. |
1951 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { | 1945 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
1952 assert(Producer->isDeleted()); | 1946 assert(Producer->isDeleted()); |
1953 switch (BoolFolding::getProducerKind(Producer)) { | 1947 switch (BoolFolding::getProducerKind(Producer)) { |
(...skipping 13 matching lines...) Expand all Loading... | |
1967 return; | 1961 return; |
1968 } | 1962 } |
1969 } | 1963 } |
1970 } | 1964 } |
1971 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 1965 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
1972 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1966 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1973 _cmp(Src0, Zero); | 1967 _cmp(Src0, Zero); |
1974 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 1968 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
1975 } | 1969 } |
1976 | 1970 |
1977 template <class Machine> | 1971 template <typename TraitsType> |
1978 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 1972 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { |
1979 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 1973 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
1980 InstCast::OpKind CastKind = Inst->getCastKind(); | 1974 InstCast::OpKind CastKind = Inst->getCastKind(); |
1981 Variable *Dest = Inst->getDest(); | 1975 Variable *Dest = Inst->getDest(); |
1982 Type DestTy = Dest->getType(); | 1976 Type DestTy = Dest->getType(); |
1983 switch (CastKind) { | 1977 switch (CastKind) { |
1984 default: | 1978 default: |
1985 Func->setError("Cast type not supported"); | 1979 Func->setError("Cast type not supported"); |
1986 return; | 1980 return; |
1987 case InstCast::Sext: { | 1981 case InstCast::Sext: { |
1988 // Src0RM is the source operand legalized to physical register or memory, | 1982 // Src0RM is the source operand legalized to physical register or memory, |
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2146 Variable *T = makeReg(DestTy); | 2140 Variable *T = makeReg(DestTy); |
2147 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); | 2141 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); |
2148 _mov(Dest, T); | 2142 _mov(Dest, T); |
2149 break; | 2143 break; |
2150 } | 2144 } |
2151 case InstCast::Fptosi: | 2145 case InstCast::Fptosi: |
2152 if (isVectorType(DestTy)) { | 2146 if (isVectorType(DestTy)) { |
2153 assert(DestTy == IceType_v4i32 && | 2147 assert(DestTy == IceType_v4i32 && |
2154 Inst->getSrc(0)->getType() == IceType_v4f32); | 2148 Inst->getSrc(0)->getType() == IceType_v4f32); |
2155 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2149 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2156 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2150 if (llvm::isa<X86OperandMem>(Src0RM)) |
2157 Src0RM = legalizeToReg(Src0RM); | 2151 Src0RM = legalizeToReg(Src0RM); |
2158 Variable *T = makeReg(DestTy); | 2152 Variable *T = makeReg(DestTy); |
2159 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2153 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
2160 _movp(Dest, T); | 2154 _movp(Dest, T); |
2161 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { | 2155 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
2162 llvm::report_fatal_error("Helper call was expected"); | 2156 llvm::report_fatal_error("Helper call was expected"); |
2163 } else { | 2157 } else { |
2164 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2158 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2165 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2159 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
2166 Variable *T_1 = nullptr; | 2160 Variable *T_1 = nullptr; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2212 if (DestTy == IceType_i1) | 2206 if (DestTy == IceType_i1) |
2213 _and(T_2, Ctx->getConstantInt1(1)); | 2207 _and(T_2, Ctx->getConstantInt1(1)); |
2214 _mov(Dest, T_2); | 2208 _mov(Dest, T_2); |
2215 } | 2209 } |
2216 break; | 2210 break; |
2217 case InstCast::Sitofp: | 2211 case InstCast::Sitofp: |
2218 if (isVectorType(DestTy)) { | 2212 if (isVectorType(DestTy)) { |
2219 assert(DestTy == IceType_v4f32 && | 2213 assert(DestTy == IceType_v4f32 && |
2220 Inst->getSrc(0)->getType() == IceType_v4i32); | 2214 Inst->getSrc(0)->getType() == IceType_v4i32); |
2221 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2215 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2222 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2216 if (llvm::isa<X86OperandMem>(Src0RM)) |
2223 Src0RM = legalizeToReg(Src0RM); | 2217 Src0RM = legalizeToReg(Src0RM); |
2224 Variable *T = makeReg(DestTy); | 2218 Variable *T = makeReg(DestTy); |
2225 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2219 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
2226 _movp(Dest, T); | 2220 _movp(Dest, T); |
2227 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2221 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
2228 llvm::report_fatal_error("Helper call was expected"); | 2222 llvm::report_fatal_error("Helper call was expected"); |
2229 } else { | 2223 } else { |
2230 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2224 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2231 // Sign-extend the operand. | 2225 // Sign-extend the operand. |
2232 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2226 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2297 Type SrcType = Src0RM->getType(); | 2291 Type SrcType = Src0RM->getType(); |
2298 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || | 2292 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || |
2299 (DestTy == IceType_f32 && SrcType == IceType_i32)); | 2293 (DestTy == IceType_f32 && SrcType == IceType_i32)); |
2300 // a.i32 = bitcast b.f32 ==> | 2294 // a.i32 = bitcast b.f32 ==> |
2301 // t.f32 = b.f32 | 2295 // t.f32 = b.f32 |
2302 // s.f32 = spill t.f32 | 2296 // s.f32 = spill t.f32 |
2303 // a.i32 = s.f32 | 2297 // a.i32 = s.f32 |
2304 Variable *T = nullptr; | 2298 Variable *T = nullptr; |
2305 // TODO: Should be able to force a spill setup by calling legalize() with | 2299 // TODO: Should be able to force a spill setup by calling legalize() with |
2306 // Legal_Mem and not Legal_Reg or Legal_Imm. | 2300 // Legal_Mem and not Legal_Reg or Legal_Imm. |
2307 typename Traits::SpillVariable *SpillVar = | 2301 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType); |
2308 Func->makeVariable<typename Traits::SpillVariable>(SrcType); | |
2309 SpillVar->setLinkedTo(Dest); | 2302 SpillVar->setLinkedTo(Dest); |
2310 Variable *Spill = SpillVar; | 2303 Variable *Spill = SpillVar; |
2311 Spill->setMustNotHaveReg(); | 2304 Spill->setMustNotHaveReg(); |
2312 _mov(T, Src0RM); | 2305 _mov(T, Src0RM); |
2313 _mov(Spill, T); | 2306 _mov(Spill, T); |
2314 _mov(Dest, Spill); | 2307 _mov(Dest, Spill); |
2315 } break; | 2308 } break; |
2316 case IceType_i64: { | 2309 case IceType_i64: { |
2317 assert(Src0->getType() == IceType_f64); | 2310 assert(Src0->getType() == IceType_f64); |
2318 if (Traits::Is64Bit) { | 2311 if (Traits::Is64Bit) { |
2319 Variable *Src0R = legalizeToReg(Src0); | 2312 Variable *Src0R = legalizeToReg(Src0); |
2320 Variable *T = makeReg(IceType_i64); | 2313 Variable *T = makeReg(IceType_i64); |
2321 _movd(T, Src0R); | 2314 _movd(T, Src0R); |
2322 _mov(Dest, T); | 2315 _mov(Dest, T); |
2323 } else { | 2316 } else { |
2324 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2317 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
2325 // a.i64 = bitcast b.f64 ==> | 2318 // a.i64 = bitcast b.f64 ==> |
2326 // s.f64 = spill b.f64 | 2319 // s.f64 = spill b.f64 |
2327 // t_lo.i32 = lo(s.f64) | 2320 // t_lo.i32 = lo(s.f64) |
2328 // a_lo.i32 = t_lo.i32 | 2321 // a_lo.i32 = t_lo.i32 |
2329 // t_hi.i32 = hi(s.f64) | 2322 // t_hi.i32 = hi(s.f64) |
2330 // a_hi.i32 = t_hi.i32 | 2323 // a_hi.i32 = t_hi.i32 |
2331 Operand *SpillLo, *SpillHi; | 2324 Operand *SpillLo, *SpillHi; |
2332 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { | 2325 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { |
2333 typename Traits::SpillVariable *SpillVar = | 2326 SpillVariable *SpillVar = |
2334 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | 2327 Func->makeVariable<SpillVariable>(IceType_f64); |
2335 SpillVar->setLinkedTo(Src0Var); | 2328 SpillVar->setLinkedTo(Src0Var); |
2336 Variable *Spill = SpillVar; | 2329 Variable *Spill = SpillVar; |
2337 Spill->setMustNotHaveReg(); | 2330 Spill->setMustNotHaveReg(); |
2338 _movq(Spill, Src0RM); | 2331 _movq(Spill, Src0RM); |
2339 SpillLo = Traits::VariableSplit::create(Func, Spill, | 2332 SpillLo = Traits::VariableSplit::create(Func, Spill, |
2340 Traits::VariableSplit::Low); | 2333 Traits::VariableSplit::Low); |
2341 SpillHi = Traits::VariableSplit::create(Func, Spill, | 2334 SpillHi = Traits::VariableSplit::create(Func, Spill, |
2342 Traits::VariableSplit::High); | 2335 Traits::VariableSplit::High); |
2343 } else { | 2336 } else { |
2344 SpillLo = loOperand(Src0RM); | 2337 SpillLo = loOperand(Src0RM); |
(...skipping 13 matching lines...) Expand all Loading... | |
2358 } break; | 2351 } break; |
2359 case IceType_f64: { | 2352 case IceType_f64: { |
2360 assert(Src0->getType() == IceType_i64); | 2353 assert(Src0->getType() == IceType_i64); |
2361 if (Traits::Is64Bit) { | 2354 if (Traits::Is64Bit) { |
2362 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2355 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
2363 Variable *T = makeReg(IceType_f64); | 2356 Variable *T = makeReg(IceType_f64); |
2364 _movd(T, Src0RM); | 2357 _movd(T, Src0RM); |
2365 _mov(Dest, T); | 2358 _mov(Dest, T); |
2366 } else { | 2359 } else { |
2367 Src0 = legalize(Src0); | 2360 Src0 = legalize(Src0); |
2368 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { | 2361 if (llvm::isa<X86OperandMem>(Src0)) { |
2369 Variable *T = Func->makeVariable(DestTy); | 2362 Variable *T = Func->makeVariable(DestTy); |
2370 _movq(T, Src0); | 2363 _movq(T, Src0); |
2371 _movq(Dest, T); | 2364 _movq(Dest, T); |
2372 break; | 2365 break; |
2373 } | 2366 } |
2374 // a.f64 = bitcast b.i64 ==> | 2367 // a.f64 = bitcast b.i64 ==> |
2375 // t_lo.i32 = b_lo.i32 | 2368 // t_lo.i32 = b_lo.i32 |
2376 // FakeDef(s.f64) | 2369 // FakeDef(s.f64) |
2377 // lo(s.f64) = t_lo.i32 | 2370 // lo(s.f64) = t_lo.i32 |
2378 // t_hi.i32 = b_hi.i32 | 2371 // t_hi.i32 = b_hi.i32 |
2379 // hi(s.f64) = t_hi.i32 | 2372 // hi(s.f64) = t_hi.i32 |
2380 // a.f64 = s.f64 | 2373 // a.f64 = s.f64 |
2381 typename Traits::SpillVariable *SpillVar = | 2374 SpillVariable *SpillVar = |
2382 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | 2375 Func->makeVariable<SpillVariable>(IceType_f64); |
2383 SpillVar->setLinkedTo(Dest); | 2376 SpillVar->setLinkedTo(Dest); |
2384 Variable *Spill = SpillVar; | 2377 Variable *Spill = SpillVar; |
2385 Spill->setMustNotHaveReg(); | 2378 Spill->setMustNotHaveReg(); |
2386 | 2379 |
2387 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 2380 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
2388 auto *SpillLo = Traits::VariableSplit::create( | 2381 auto *SpillLo = Traits::VariableSplit::create( |
2389 Func, Spill, Traits::VariableSplit::Low); | 2382 Func, Spill, Traits::VariableSplit::Low); |
2390 auto *SpillHi = Traits::VariableSplit::create( | 2383 auto *SpillHi = Traits::VariableSplit::create( |
2391 Func, Spill, Traits::VariableSplit::High); | 2384 Func, Spill, Traits::VariableSplit::High); |
2392 _mov(T_Lo, loOperand(Src0)); | 2385 _mov(T_Lo, loOperand(Src0)); |
(...skipping 18 matching lines...) Expand all Loading... | |
2411 case IceType_v4i32: | 2404 case IceType_v4i32: |
2412 case IceType_v4f32: { | 2405 case IceType_v4f32: { |
2413 _movp(Dest, legalizeToReg(Src0)); | 2406 _movp(Dest, legalizeToReg(Src0)); |
2414 } break; | 2407 } break; |
2415 } | 2408 } |
2416 break; | 2409 break; |
2417 } | 2410 } |
2418 } | 2411 } |
2419 } | 2412 } |
2420 | 2413 |
2421 template <class Machine> | 2414 template <typename TraitsType> |
2422 void TargetX86Base<Machine>::lowerExtractElement( | 2415 void TargetX86Base<TraitsType>::lowerExtractElement( |
2423 const InstExtractElement *Inst) { | 2416 const InstExtractElement *Inst) { |
2424 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2417 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
2425 ConstantInteger32 *ElementIndex = | 2418 ConstantInteger32 *ElementIndex = |
2426 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2419 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
2427 // Only constant indices are allowed in PNaCl IR. | 2420 // Only constant indices are allowed in PNaCl IR. |
2428 assert(ElementIndex); | 2421 assert(ElementIndex); |
2429 | 2422 |
2430 unsigned Index = ElementIndex->getValue(); | 2423 unsigned Index = ElementIndex->getValue(); |
2431 Type Ty = SourceVectNotLegalized->getType(); | 2424 Type Ty = SourceVectNotLegalized->getType(); |
2432 Type ElementTy = typeElementType(Ty); | 2425 Type ElementTy = typeElementType(Ty); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2474 // Spill the value to a stack slot and do the extraction in memory. | 2467 // Spill the value to a stack slot and do the extraction in memory. |
2475 // | 2468 // |
2476 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support | 2469 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support |
2477 // for legalizing to mem is implemented. | 2470 // for legalizing to mem is implemented. |
2478 Variable *Slot = Func->makeVariable(Ty); | 2471 Variable *Slot = Func->makeVariable(Ty); |
2479 Slot->setMustNotHaveReg(); | 2472 Slot->setMustNotHaveReg(); |
2480 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); | 2473 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); |
2481 | 2474 |
2482 // Compute the location of the element in memory. | 2475 // Compute the location of the element in memory. |
2483 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); | 2476 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); |
2484 typename Traits::X86OperandMem *Loc = | 2477 X86OperandMem *Loc = |
2485 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 2478 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
2486 _mov(ExtractedElementR, Loc); | 2479 _mov(ExtractedElementR, Loc); |
2487 } | 2480 } |
2488 | 2481 |
2489 if (ElementTy == IceType_i1) { | 2482 if (ElementTy == IceType_i1) { |
2490 // Truncate extracted integers to i1s if necessary. | 2483 // Truncate extracted integers to i1s if necessary. |
2491 Variable *T = makeReg(IceType_i1); | 2484 Variable *T = makeReg(IceType_i1); |
2492 InstCast *Cast = | 2485 InstCast *Cast = |
2493 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); | 2486 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); |
2494 lowerCast(Cast); | 2487 lowerCast(Cast); |
2495 ExtractedElementR = T; | 2488 ExtractedElementR = T; |
2496 } | 2489 } |
2497 | 2490 |
2498 // Copy the element to the destination. | 2491 // Copy the element to the destination. |
2499 Variable *Dest = Inst->getDest(); | 2492 Variable *Dest = Inst->getDest(); |
2500 _mov(Dest, ExtractedElementR); | 2493 _mov(Dest, ExtractedElementR); |
2501 } | 2494 } |
2502 | 2495 |
2503 template <class Machine> | 2496 template <typename TraitsType> |
2504 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) { | 2497 void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) { |
2505 Variable *Dest = Fcmp->getDest(); | 2498 Variable *Dest = Fcmp->getDest(); |
2506 | 2499 |
2507 if (isVectorType(Dest->getType())) { | 2500 if (isVectorType(Dest->getType())) { |
2508 lowerFcmpVector(Fcmp); | 2501 lowerFcmpVector(Fcmp); |
2509 } else { | 2502 } else { |
2510 constexpr Inst *Consumer = nullptr; | 2503 constexpr Inst *Consumer = nullptr; |
2511 lowerFcmpAndConsumer(Fcmp, Consumer); | 2504 lowerFcmpAndConsumer(Fcmp, Consumer); |
2512 } | 2505 } |
2513 } | 2506 } |
2514 | 2507 |
2515 template <class Machine> | 2508 template <typename TraitsType> |
2516 void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, | 2509 void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, |
2517 const Inst *Consumer) { | 2510 const Inst *Consumer) { |
2518 Operand *Src0 = Fcmp->getSrc(0); | 2511 Operand *Src0 = Fcmp->getSrc(0); |
2519 Operand *Src1 = Fcmp->getSrc(1); | 2512 Operand *Src1 = Fcmp->getSrc(1); |
2520 Variable *Dest = Fcmp->getDest(); | 2513 Variable *Dest = Fcmp->getDest(); |
2521 | 2514 |
2522 if (isVectorType(Dest->getType())) | 2515 if (isVectorType(Dest->getType())) |
2523 llvm::report_fatal_error("Vector compare/branch cannot be folded"); | 2516 llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
2524 | 2517 |
2525 if (Consumer != nullptr) { | 2518 if (Consumer != nullptr) { |
2526 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2519 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
2527 if (lowerOptimizeFcmpSelect(Fcmp, Select)) | 2520 if (lowerOptimizeFcmpSelect(Fcmp, Select)) |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2559 assert(Traits::TableFcmp[Index].Default); | 2552 assert(Traits::TableFcmp[Index].Default); |
2560 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); | 2553 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); |
2561 return; | 2554 return; |
2562 } | 2555 } |
2563 } | 2556 } |
2564 int32_t IntDefault = Traits::TableFcmp[Index].Default; | 2557 int32_t IntDefault = Traits::TableFcmp[Index].Default; |
2565 if (Consumer == nullptr) { | 2558 if (Consumer == nullptr) { |
2566 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); | 2559 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); |
2567 _mov(Dest, Default); | 2560 _mov(Dest, Default); |
2568 if (HasC1) { | 2561 if (HasC1) { |
2569 typename Traits::Insts::Label *Label = | 2562 InstX86Label *Label = InstX86Label::create(Func, this); |
2570 Traits::Insts::Label::create(Func, this); | |
2571 _br(Traits::TableFcmp[Index].C1, Label); | 2563 _br(Traits::TableFcmp[Index].C1, Label); |
2572 if (HasC2) { | 2564 if (HasC2) { |
2573 _br(Traits::TableFcmp[Index].C2, Label); | 2565 _br(Traits::TableFcmp[Index].C2, Label); |
2574 } | 2566 } |
2575 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); | 2567 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); |
2576 _redefined(_mov(Dest, NonDefault)); | 2568 _redefined(_mov(Dest, NonDefault)); |
2577 Context.insert(Label); | 2569 Context.insert(Label); |
2578 } | 2570 } |
2579 return; | 2571 return; |
2580 } | 2572 } |
(...skipping 14 matching lines...) Expand all Loading... | |
2595 return; | 2587 return; |
2596 } | 2588 } |
2597 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2589 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
2598 Operand *SrcT = Select->getTrueOperand(); | 2590 Operand *SrcT = Select->getTrueOperand(); |
2599 Operand *SrcF = Select->getFalseOperand(); | 2591 Operand *SrcF = Select->getFalseOperand(); |
2600 Variable *SelectDest = Select->getDest(); | 2592 Variable *SelectDest = Select->getDest(); |
2601 if (IntDefault != 0) | 2593 if (IntDefault != 0) |
2602 std::swap(SrcT, SrcF); | 2594 std::swap(SrcT, SrcF); |
2603 lowerMove(SelectDest, SrcF, false); | 2595 lowerMove(SelectDest, SrcF, false); |
2604 if (HasC1) { | 2596 if (HasC1) { |
2605 typename Traits::Insts::Label *Label = | 2597 InstX86Label *Label = InstX86Label::create(Func, this); |
2606 Traits::Insts::Label::create(Func, this); | |
2607 _br(Traits::TableFcmp[Index].C1, Label); | 2598 _br(Traits::TableFcmp[Index].C1, Label); |
2608 if (HasC2) { | 2599 if (HasC2) { |
2609 _br(Traits::TableFcmp[Index].C2, Label); | 2600 _br(Traits::TableFcmp[Index].C2, Label); |
2610 } | 2601 } |
2611 static constexpr bool IsRedefinition = true; | 2602 static constexpr bool IsRedefinition = true; |
2612 lowerMove(SelectDest, SrcT, IsRedefinition); | 2603 lowerMove(SelectDest, SrcT, IsRedefinition); |
2613 Context.insert(Label); | 2604 Context.insert(Label); |
2614 } | 2605 } |
2615 return; | 2606 return; |
2616 } | 2607 } |
2617 llvm::report_fatal_error("Unexpected consumer type"); | 2608 llvm::report_fatal_error("Unexpected consumer type"); |
2618 } | 2609 } |
2619 | 2610 |
2620 template <class Machine> | 2611 template <typename TraitsType> |
2621 void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) { | 2612 void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) { |
2622 Operand *Src0 = Fcmp->getSrc(0); | 2613 Operand *Src0 = Fcmp->getSrc(0); |
2623 Operand *Src1 = Fcmp->getSrc(1); | 2614 Operand *Src1 = Fcmp->getSrc(1); |
2624 Variable *Dest = Fcmp->getDest(); | 2615 Variable *Dest = Fcmp->getDest(); |
2625 | 2616 |
2626 if (!isVectorType(Dest->getType())) | 2617 if (!isVectorType(Dest->getType())) |
2627 llvm::report_fatal_error("Expected vector compare"); | 2618 llvm::report_fatal_error("Expected vector compare"); |
2628 | 2619 |
2629 InstFcmp::FCond Condition = Fcmp->getCondition(); | 2620 InstFcmp::FCond Condition = Fcmp->getCondition(); |
2630 size_t Index = static_cast<size_t>(Condition); | 2621 size_t Index = static_cast<size_t>(Condition); |
2631 assert(Index < Traits::TableFcmpSize); | 2622 assert(Index < Traits::TableFcmpSize); |
2632 | 2623 |
2633 if (Traits::TableFcmp[Index].SwapVectorOperands) | 2624 if (Traits::TableFcmp[Index].SwapVectorOperands) |
2634 std::swap(Src0, Src1); | 2625 std::swap(Src0, Src1); |
2635 | 2626 |
2636 Variable *T = nullptr; | 2627 Variable *T = nullptr; |
2637 | 2628 |
2638 if (Condition == InstFcmp::True) { | 2629 if (Condition == InstFcmp::True) { |
2639 // makeVectorOfOnes() requires an integer vector type. | 2630 // makeVectorOfOnes() requires an integer vector type. |
2640 T = makeVectorOfMinusOnes(IceType_v4i32); | 2631 T = makeVectorOfMinusOnes(IceType_v4i32); |
2641 } else if (Condition == InstFcmp::False) { | 2632 } else if (Condition == InstFcmp::False) { |
2642 T = makeVectorOfZeros(Dest->getType()); | 2633 T = makeVectorOfZeros(Dest->getType()); |
2643 } else { | 2634 } else { |
2644 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2635 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
2645 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2636 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2646 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2637 if (llvm::isa<X86OperandMem>(Src1RM)) |
2647 Src1RM = legalizeToReg(Src1RM); | 2638 Src1RM = legalizeToReg(Src1RM); |
2648 | 2639 |
2649 switch (Condition) { | 2640 switch (Condition) { |
2650 default: { | 2641 default: { |
2651 typename Traits::Cond::CmppsCond Predicate = | 2642 CmppsCond Predicate = Traits::TableFcmp[Index].Predicate; |
2652 Traits::TableFcmp[Index].Predicate; | |
2653 assert(Predicate != Traits::Cond::Cmpps_Invalid); | 2643 assert(Predicate != Traits::Cond::Cmpps_Invalid); |
2654 T = makeReg(Src0RM->getType()); | 2644 T = makeReg(Src0RM->getType()); |
2655 _movp(T, Src0RM); | 2645 _movp(T, Src0RM); |
2656 _cmpps(T, Src1RM, Predicate); | 2646 _cmpps(T, Src1RM, Predicate); |
2657 } break; | 2647 } break; |
2658 case InstFcmp::One: { | 2648 case InstFcmp::One: { |
2659 // Check both unequal and ordered. | 2649 // Check both unequal and ordered. |
2660 T = makeReg(Src0RM->getType()); | 2650 T = makeReg(Src0RM->getType()); |
2661 Variable *T2 = makeReg(Src0RM->getType()); | 2651 Variable *T2 = makeReg(Src0RM->getType()); |
2662 _movp(T, Src0RM); | 2652 _movp(T, Src0RM); |
(...skipping 21 matching lines...) Expand all Loading... | |
2684 } | 2674 } |
2685 | 2675 |
2686 inline bool isZero(const Operand *Opnd) { | 2676 inline bool isZero(const Operand *Opnd) { |
2687 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) | 2677 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) |
2688 return C64->getValue() == 0; | 2678 return C64->getValue() == 0; |
2689 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 2679 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
2690 return C32->getValue() == 0; | 2680 return C32->getValue() == 0; |
2691 return false; | 2681 return false; |
2692 } | 2682 } |
2693 | 2683 |
2694 template <class Machine> | 2684 template <typename TraitsType> |
2695 void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp, | 2685 void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp, |
2696 const Inst *Consumer) { | 2686 const Inst *Consumer) { |
2697 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2687 Operand *Src0 = legalize(Icmp->getSrc(0)); |
2698 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2688 Operand *Src1 = legalize(Icmp->getSrc(1)); |
2699 Variable *Dest = Icmp->getDest(); | 2689 Variable *Dest = Icmp->getDest(); |
2700 | 2690 |
2701 if (isVectorType(Dest->getType())) | 2691 if (isVectorType(Dest->getType())) |
2702 llvm::report_fatal_error("Vector compare/branch cannot be folded"); | 2692 llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
2703 | 2693 |
2704 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { | 2694 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
2705 lowerIcmp64(Icmp, Consumer); | 2695 lowerIcmp64(Icmp, Consumer); |
2706 return; | 2696 return; |
(...skipping 11 matching lines...) Expand all Loading... | |
2718 movOrConsumer(false, Dest, Consumer); | 2708 movOrConsumer(false, Dest, Consumer); |
2719 return; | 2709 return; |
2720 } | 2710 } |
2721 } | 2711 } |
2722 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2712 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
2723 _cmp(Src0RM, Src1); | 2713 _cmp(Src0RM, Src1); |
2724 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, | 2714 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, |
2725 Consumer); | 2715 Consumer); |
2726 } | 2716 } |
2727 | 2717 |
2728 template <class Machine> | 2718 template <typename TraitsType> |
2729 void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) { | 2719 void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) { |
2730 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2720 Operand *Src0 = legalize(Icmp->getSrc(0)); |
2731 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2721 Operand *Src1 = legalize(Icmp->getSrc(1)); |
2732 Variable *Dest = Icmp->getDest(); | 2722 Variable *Dest = Icmp->getDest(); |
2733 | 2723 |
2734 if (!isVectorType(Dest->getType())) | 2724 if (!isVectorType(Dest->getType())) |
2735 llvm::report_fatal_error("Expected a vector compare"); | 2725 llvm::report_fatal_error("Expected a vector compare"); |
2736 | 2726 |
2737 Type Ty = Src0->getType(); | 2727 Type Ty = Src0->getType(); |
2738 // Promote i1 vectors to 128 bit integer vector types. | 2728 // Promote i1 vectors to 128 bit integer vector types. |
2739 if (typeElementType(Ty) == IceType_i1) { | 2729 if (typeElementType(Ty) == IceType_i1) { |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2781 Src0RM = T0; | 2771 Src0RM = T0; |
2782 Src1RM = T1; | 2772 Src1RM = T1; |
2783 } | 2773 } |
2784 | 2774 |
2785 Variable *T = makeReg(Ty); | 2775 Variable *T = makeReg(Ty); |
2786 switch (Condition) { | 2776 switch (Condition) { |
2787 default: | 2777 default: |
2788 llvm_unreachable("unexpected condition"); | 2778 llvm_unreachable("unexpected condition"); |
2789 break; | 2779 break; |
2790 case InstIcmp::Eq: { | 2780 case InstIcmp::Eq: { |
2791 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2781 if (llvm::isa<X86OperandMem>(Src1RM)) |
2792 Src1RM = legalizeToReg(Src1RM); | 2782 Src1RM = legalizeToReg(Src1RM); |
2793 _movp(T, Src0RM); | 2783 _movp(T, Src0RM); |
2794 _pcmpeq(T, Src1RM); | 2784 _pcmpeq(T, Src1RM); |
2795 } break; | 2785 } break; |
2796 case InstIcmp::Ne: { | 2786 case InstIcmp::Ne: { |
2797 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2787 if (llvm::isa<X86OperandMem>(Src1RM)) |
2798 Src1RM = legalizeToReg(Src1RM); | 2788 Src1RM = legalizeToReg(Src1RM); |
2799 _movp(T, Src0RM); | 2789 _movp(T, Src0RM); |
2800 _pcmpeq(T, Src1RM); | 2790 _pcmpeq(T, Src1RM); |
2801 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2791 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
2802 _pxor(T, MinusOne); | 2792 _pxor(T, MinusOne); |
2803 } break; | 2793 } break; |
2804 case InstIcmp::Ugt: | 2794 case InstIcmp::Ugt: |
2805 case InstIcmp::Sgt: { | 2795 case InstIcmp::Sgt: { |
2806 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2796 if (llvm::isa<X86OperandMem>(Src1RM)) |
2807 Src1RM = legalizeToReg(Src1RM); | 2797 Src1RM = legalizeToReg(Src1RM); |
2808 _movp(T, Src0RM); | 2798 _movp(T, Src0RM); |
2809 _pcmpgt(T, Src1RM); | 2799 _pcmpgt(T, Src1RM); |
2810 } break; | 2800 } break; |
2811 case InstIcmp::Uge: | 2801 case InstIcmp::Uge: |
2812 case InstIcmp::Sge: { | 2802 case InstIcmp::Sge: { |
2813 // !(Src1RM > Src0RM) | 2803 // !(Src1RM > Src0RM) |
2814 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2804 if (llvm::isa<X86OperandMem>(Src0RM)) |
2815 Src0RM = legalizeToReg(Src0RM); | 2805 Src0RM = legalizeToReg(Src0RM); |
2816 _movp(T, Src1RM); | 2806 _movp(T, Src1RM); |
2817 _pcmpgt(T, Src0RM); | 2807 _pcmpgt(T, Src0RM); |
2818 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2808 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
2819 _pxor(T, MinusOne); | 2809 _pxor(T, MinusOne); |
2820 } break; | 2810 } break; |
2821 case InstIcmp::Ult: | 2811 case InstIcmp::Ult: |
2822 case InstIcmp::Slt: { | 2812 case InstIcmp::Slt: { |
2823 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2813 if (llvm::isa<X86OperandMem>(Src0RM)) |
2824 Src0RM = legalizeToReg(Src0RM); | 2814 Src0RM = legalizeToReg(Src0RM); |
2825 _movp(T, Src1RM); | 2815 _movp(T, Src1RM); |
2826 _pcmpgt(T, Src0RM); | 2816 _pcmpgt(T, Src0RM); |
2827 } break; | 2817 } break; |
2828 case InstIcmp::Ule: | 2818 case InstIcmp::Ule: |
2829 case InstIcmp::Sle: { | 2819 case InstIcmp::Sle: { |
2830 // !(Src0RM > Src1RM) | 2820 // !(Src0RM > Src1RM) |
2831 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | 2821 if (llvm::isa<X86OperandMem>(Src1RM)) |
2832 Src1RM = legalizeToReg(Src1RM); | 2822 Src1RM = legalizeToReg(Src1RM); |
2833 _movp(T, Src0RM); | 2823 _movp(T, Src0RM); |
2834 _pcmpgt(T, Src1RM); | 2824 _pcmpgt(T, Src1RM); |
2835 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2825 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
2836 _pxor(T, MinusOne); | 2826 _pxor(T, MinusOne); |
2837 } break; | 2827 } break; |
2838 } | 2828 } |
2839 | 2829 |
2840 _movp(Dest, T); | 2830 _movp(Dest, T); |
2841 eliminateNextVectorSextInstruction(Dest); | 2831 eliminateNextVectorSextInstruction(Dest); |
2842 } | 2832 } |
2843 | 2833 |
2844 template <typename Machine> | 2834 template <typename TraitsType> |
2845 template <typename T> | 2835 template <typename T> |
2846 typename std::enable_if<!T::Is64Bit, void>::type | 2836 typename std::enable_if<!T::Is64Bit, void>::type |
2847 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, | 2837 TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp, |
2848 const Inst *Consumer) { | 2838 const Inst *Consumer) { |
2849 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2839 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
2850 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2840 Operand *Src0 = legalize(Icmp->getSrc(0)); |
2851 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2841 Operand *Src1 = legalize(Icmp->getSrc(1)); |
2852 Variable *Dest = Icmp->getDest(); | 2842 Variable *Dest = Icmp->getDest(); |
2853 InstIcmp::ICond Condition = Icmp->getCondition(); | 2843 InstIcmp::ICond Condition = Icmp->getCondition(); |
2854 size_t Index = static_cast<size_t>(Condition); | 2844 size_t Index = static_cast<size_t>(Condition); |
2855 assert(Index < Traits::TableIcmp64Size); | 2845 assert(Index < Traits::TableIcmp64Size); |
2856 Operand *Src0LoRM = nullptr; | 2846 Operand *Src0LoRM = nullptr; |
2857 Operand *Src0HiRM = nullptr; | 2847 Operand *Src0HiRM = nullptr; |
2858 // Legalize the portions of Src0 that are going to be needed. | 2848 // Legalize the portions of Src0 that are going to be needed. |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2933 case InstIcmp::Sle: | 2923 case InstIcmp::Sle: |
2934 break; | 2924 break; |
2935 } | 2925 } |
2936 } | 2926 } |
2937 // Handle general compares. | 2927 // Handle general compares. |
2938 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 2928 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
2939 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 2929 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
2940 if (Consumer == nullptr) { | 2930 if (Consumer == nullptr) { |
2941 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); | 2931 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); |
2942 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); | 2932 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); |
2943 typename Traits::Insts::Label *LabelFalse = | 2933 InstX86Label *LabelFalse = InstX86Label::create(Func, this); |
2944 Traits::Insts::Label::create(Func, this); | 2934 InstX86Label *LabelTrue = InstX86Label::create(Func, this); |
2945 typename Traits::Insts::Label *LabelTrue = | |
2946 Traits::Insts::Label::create(Func, this); | |
2947 _mov(Dest, One); | 2935 _mov(Dest, One); |
2948 _cmp(Src0HiRM, Src1HiRI); | 2936 _cmp(Src0HiRM, Src1HiRI); |
2949 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2937 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
2950 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | 2938 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
2951 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2939 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
2952 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | 2940 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
2953 _cmp(Src0LoRM, Src1LoRI); | 2941 _cmp(Src0LoRM, Src1LoRI); |
2954 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | 2942 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
2955 Context.insert(LabelFalse); | 2943 Context.insert(LabelFalse); |
2956 _redefined(_mov(Dest, Zero)); | 2944 _redefined(_mov(Dest, Zero)); |
2957 Context.insert(LabelTrue); | 2945 Context.insert(LabelTrue); |
2958 return; | 2946 return; |
2959 } | 2947 } |
2960 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 2948 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
2961 _cmp(Src0HiRM, Src1HiRI); | 2949 _cmp(Src0HiRM, Src1HiRI); |
2962 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2950 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
2963 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); | 2951 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); |
2964 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2952 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
2965 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); | 2953 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); |
2966 _cmp(Src0LoRM, Src1LoRI); | 2954 _cmp(Src0LoRM, Src1LoRI); |
2967 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), | 2955 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), |
2968 Br->getTargetFalse()); | 2956 Br->getTargetFalse()); |
2969 return; | 2957 return; |
2970 } | 2958 } |
2971 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2959 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
2972 Operand *SrcT = Select->getTrueOperand(); | 2960 Operand *SrcT = Select->getTrueOperand(); |
2973 Operand *SrcF = Select->getFalseOperand(); | 2961 Operand *SrcF = Select->getFalseOperand(); |
2974 Variable *SelectDest = Select->getDest(); | 2962 Variable *SelectDest = Select->getDest(); |
2975 typename Traits::Insts::Label *LabelFalse = | 2963 InstX86Label *LabelFalse = InstX86Label::create(Func, this); |
2976 Traits::Insts::Label::create(Func, this); | 2964 InstX86Label *LabelTrue = InstX86Label::create(Func, this); |
2977 typename Traits::Insts::Label *LabelTrue = | |
2978 Traits::Insts::Label::create(Func, this); | |
2979 lowerMove(SelectDest, SrcT, false); | 2965 lowerMove(SelectDest, SrcT, false); |
2980 _cmp(Src0HiRM, Src1HiRI); | 2966 _cmp(Src0HiRM, Src1HiRI); |
2981 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2967 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
2982 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | 2968 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
2983 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2969 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
2984 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | 2970 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
2985 _cmp(Src0LoRM, Src1LoRI); | 2971 _cmp(Src0LoRM, Src1LoRI); |
2986 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | 2972 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
2987 Context.insert(LabelFalse); | 2973 Context.insert(LabelFalse); |
2988 static constexpr bool IsRedefinition = true; | 2974 static constexpr bool IsRedefinition = true; |
2989 lowerMove(SelectDest, SrcF, IsRedefinition); | 2975 lowerMove(SelectDest, SrcF, IsRedefinition); |
2990 Context.insert(LabelTrue); | 2976 Context.insert(LabelTrue); |
2991 return; | 2977 return; |
2992 } | 2978 } |
2993 llvm::report_fatal_error("Unexpected consumer type"); | 2979 llvm::report_fatal_error("Unexpected consumer type"); |
2994 } | 2980 } |
2995 | 2981 |
2996 template <class Machine> | 2982 template <typename TraitsType> |
2997 void TargetX86Base<Machine>::setccOrConsumer( | 2983 void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition, |
2998 typename Traits::Cond::BrCond Condition, Variable *Dest, | 2984 Variable *Dest, |
2999 const Inst *Consumer) { | 2985 const Inst *Consumer) { |
3000 if (Consumer == nullptr) { | 2986 if (Consumer == nullptr) { |
3001 _setcc(Dest, Condition); | 2987 _setcc(Dest, Condition); |
3002 return; | 2988 return; |
3003 } | 2989 } |
3004 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 2990 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
3005 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); | 2991 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); |
3006 return; | 2992 return; |
3007 } | 2993 } |
3008 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 2994 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
3009 Operand *SrcT = Select->getTrueOperand(); | 2995 Operand *SrcT = Select->getTrueOperand(); |
3010 Operand *SrcF = Select->getFalseOperand(); | 2996 Operand *SrcF = Select->getFalseOperand(); |
3011 Variable *SelectDest = Select->getDest(); | 2997 Variable *SelectDest = Select->getDest(); |
3012 lowerSelectMove(SelectDest, Condition, SrcT, SrcF); | 2998 lowerSelectMove(SelectDest, Condition, SrcT, SrcF); |
3013 return; | 2999 return; |
3014 } | 3000 } |
3015 llvm::report_fatal_error("Unexpected consumer type"); | 3001 llvm::report_fatal_error("Unexpected consumer type"); |
3016 } | 3002 } |
3017 | 3003 |
3018 template <class Machine> | 3004 template <typename TraitsType> |
3019 void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest, | 3005 void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest, |
3020 const Inst *Consumer) { | 3006 const Inst *Consumer) { |
3021 if (Consumer == nullptr) { | 3007 if (Consumer == nullptr) { |
3022 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3008 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
3023 return; | 3009 return; |
3024 } | 3010 } |
3025 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 3011 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
3026 // TODO(sehr,stichnot): This could be done with a single unconditional | 3012 // TODO(sehr,stichnot): This could be done with a single unconditional |
3027 // branch instruction, but subzero doesn't know how to handle the resulting | 3013 // branch instruction, but subzero doesn't know how to handle the resulting |
3028 // control flow graph changes now. Make it do so to eliminate mov and cmp. | 3014 // control flow graph changes now. Make it do so to eliminate mov and cmp. |
3029 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3015 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
3030 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); | 3016 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); |
3031 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3017 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
3032 return; | 3018 return; |
3033 } | 3019 } |
3034 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { | 3020 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
3035 Operand *Src = nullptr; | 3021 Operand *Src = nullptr; |
3036 if (IcmpResult) { | 3022 if (IcmpResult) { |
3037 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); | 3023 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); |
3038 } else { | 3024 } else { |
3039 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); | 3025 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); |
3040 } | 3026 } |
3041 Variable *SelectDest = Select->getDest(); | 3027 Variable *SelectDest = Select->getDest(); |
3042 lowerMove(SelectDest, Src, false); | 3028 lowerMove(SelectDest, Src, false); |
3043 return; | 3029 return; |
3044 } | 3030 } |
3045 llvm::report_fatal_error("Unexpected consumer type"); | 3031 llvm::report_fatal_error("Unexpected consumer type"); |
3046 } | 3032 } |
3047 | 3033 |
3048 template <class Machine> | 3034 template <typename TraitsType> |
3049 void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith, | 3035 void TargetX86Base<TraitsType>::lowerArithAndConsumer( |
3050 const Inst *Consumer) { | 3036 const InstArithmetic *Arith, const Inst *Consumer) { |
3051 Variable *T = nullptr; | 3037 Variable *T = nullptr; |
3052 Operand *Src0 = legalize(Arith->getSrc(0)); | 3038 Operand *Src0 = legalize(Arith->getSrc(0)); |
3053 Operand *Src1 = legalize(Arith->getSrc(1)); | 3039 Operand *Src1 = legalize(Arith->getSrc(1)); |
3054 Variable *Dest = Arith->getDest(); | 3040 Variable *Dest = Arith->getDest(); |
3055 switch (Arith->getOp()) { | 3041 switch (Arith->getOp()) { |
3056 default: | 3042 default: |
3057 llvm_unreachable("arithmetic operator not AND or OR"); | 3043 llvm_unreachable("arithmetic operator not AND or OR"); |
3058 break; | 3044 break; |
3059 case InstArithmetic::And: | 3045 case InstArithmetic::And: |
3060 _mov(T, Src0); | 3046 _mov(T, Src0); |
(...skipping 16 matching lines...) Expand all Loading... | |
3077 } | 3063 } |
3078 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { | 3064 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
3079 Context.insert<InstFakeUse>(T); | 3065 Context.insert<InstFakeUse>(T); |
3080 Context.insert<InstFakeDef>(Dest); | 3066 Context.insert<InstFakeDef>(Dest); |
3081 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3067 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
3082 return; | 3068 return; |
3083 } | 3069 } |
3084 llvm::report_fatal_error("Unexpected consumer type"); | 3070 llvm::report_fatal_error("Unexpected consumer type"); |
3085 } | 3071 } |
3086 | 3072 |
3087 template <class Machine> | 3073 template <typename TraitsType> |
3088 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 3074 void TargetX86Base<TraitsType>::lowerInsertElement( |
3075 const InstInsertElement *Inst) { | |
3089 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3076 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
3090 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3077 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
3091 ConstantInteger32 *ElementIndex = | 3078 ConstantInteger32 *ElementIndex = |
3092 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 3079 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
3093 // Only constant indices are allowed in PNaCl IR. | 3080 // Only constant indices are allowed in PNaCl IR. |
3094 assert(ElementIndex); | 3081 assert(ElementIndex); |
3095 unsigned Index = ElementIndex->getValue(); | 3082 unsigned Index = ElementIndex->getValue(); |
3096 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 3083 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
3097 | 3084 |
3098 Type Ty = SourceVectNotLegalized->getType(); | 3085 Type Ty = SourceVectNotLegalized->getType(); |
(...skipping 17 matching lines...) Expand all Loading... | |
3116 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3103 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
3117 Operand *SourceVectRM = | 3104 Operand *SourceVectRM = |
3118 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3105 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
3119 Variable *T = makeReg(Ty); | 3106 Variable *T = makeReg(Ty); |
3120 _movp(T, SourceVectRM); | 3107 _movp(T, SourceVectRM); |
3121 if (Ty == IceType_v4f32) { | 3108 if (Ty == IceType_v4f32) { |
3122 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3109 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
3123 } else { | 3110 } else { |
3124 // For the pinsrb and pinsrw instructions, when the source operand is a | 3111 // For the pinsrb and pinsrw instructions, when the source operand is a |
3125 // register, it must be a full r32 register like eax, and not ax/al/ah. | 3112 // register, it must be a full r32 register like eax, and not ax/al/ah. |
3126 // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use | 3113 // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for |
3114 // the use | |
3127 // of r16 and r8 by converting them through getBaseReg(), while emitIAS() | 3115 // of r16 and r8 by converting them through getBaseReg(), while emitIAS() |
3128 // validates that the original and base register encodings are the same. | 3116 // validates that the original and base register encodings are the same. |
3129 if (ElementRM->getType() == IceType_i8 && | 3117 if (ElementRM->getType() == IceType_i8 && |
3130 llvm::isa<Variable>(ElementRM)) { | 3118 llvm::isa<Variable>(ElementRM)) { |
3131 // Don't use ah/bh/ch/dh for pinsrb. | 3119 // Don't use ah/bh/ch/dh for pinsrb. |
3132 ElementRM = copyToReg8(ElementRM); | 3120 ElementRM = copyToReg8(ElementRM); |
3133 } | 3121 } |
3134 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 3122 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); |
3135 } | 3123 } |
3136 _movp(Inst->getDest(), T); | 3124 _movp(Inst->getDest(), T); |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3201 // Spill the value to a stack slot and perform the insertion in memory. | 3189 // Spill the value to a stack slot and perform the insertion in memory. |
3202 // | 3190 // |
3203 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support | 3191 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support |
3204 // for legalizing to mem is implemented. | 3192 // for legalizing to mem is implemented. |
3205 Variable *Slot = Func->makeVariable(Ty); | 3193 Variable *Slot = Func->makeVariable(Ty); |
3206 Slot->setMustNotHaveReg(); | 3194 Slot->setMustNotHaveReg(); |
3207 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); | 3195 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); |
3208 | 3196 |
3209 // Compute the location of the position to insert in memory. | 3197 // Compute the location of the position to insert in memory. |
3210 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); | 3198 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); |
3211 typename Traits::X86OperandMem *Loc = | 3199 X86OperandMem *Loc = |
3212 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 3200 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
3213 _store(legalizeToReg(ElementToInsertNotLegalized), Loc); | 3201 _store(legalizeToReg(ElementToInsertNotLegalized), Loc); |
3214 | 3202 |
3215 Variable *T = makeReg(Ty); | 3203 Variable *T = makeReg(Ty); |
3216 _movp(T, Slot); | 3204 _movp(T, Slot); |
3217 _movp(Inst->getDest(), T); | 3205 _movp(Inst->getDest(), T); |
3218 } | 3206 } |
3219 } | 3207 } |
3220 | 3208 |
3221 template <class Machine> | 3209 template <typename TraitsType> |
3222 void TargetX86Base<Machine>::lowerIntrinsicCall( | 3210 void TargetX86Base<TraitsType>::lowerIntrinsicCall( |
3223 const InstIntrinsicCall *Instr) { | 3211 const InstIntrinsicCall *Instr) { |
3224 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { | 3212 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
3225 case Intrinsics::AtomicCmpxchg: { | 3213 case Intrinsics::AtomicCmpxchg: { |
3226 if (!Intrinsics::isMemoryOrderValid( | 3214 if (!Intrinsics::isMemoryOrderValid( |
3227 ID, getConstantMemoryOrder(Instr->getArg(3)), | 3215 ID, getConstantMemoryOrder(Instr->getArg(3)), |
3228 getConstantMemoryOrder(Instr->getArg(4)))) { | 3216 getConstantMemoryOrder(Instr->getArg(4)))) { |
3229 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 3217 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
3230 return; | 3218 return; |
3231 } | 3219 } |
3232 Variable *DestPrev = Instr->getDest(); | 3220 Variable *DestPrev = Instr->getDest(); |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3292 } | 3280 } |
3293 Variable *Dest = Instr->getDest(); | 3281 Variable *Dest = Instr->getDest(); |
3294 if (!Traits::Is64Bit) { | 3282 if (!Traits::Is64Bit) { |
3295 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { | 3283 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { |
3296 // Follow what GCC does and use a movq instead of what lowerLoad() | 3284 // Follow what GCC does and use a movq instead of what lowerLoad() |
3297 // normally does (split the load into two). Thus, this skips | 3285 // normally does (split the load into two). Thus, this skips |
3298 // load/arithmetic op folding. Load/arithmetic folding can't happen | 3286 // load/arithmetic op folding. Load/arithmetic folding can't happen |
3299 // anyway, since this is x86-32 and integer arithmetic only happens on | 3287 // anyway, since this is x86-32 and integer arithmetic only happens on |
3300 // 32-bit quantities. | 3288 // 32-bit quantities. |
3301 Variable *T = makeReg(IceType_f64); | 3289 Variable *T = makeReg(IceType_f64); |
3302 typename Traits::X86OperandMem *Addr = | 3290 X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); |
3303 formMemoryOperand(Instr->getArg(0), IceType_f64); | |
3304 _movq(T, Addr); | 3291 _movq(T, Addr); |
3305 // Then cast the bits back out of the XMM register to the i64 Dest. | 3292 // Then cast the bits back out of the XMM register to the i64 Dest. |
3306 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 3293 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
3307 lowerCast(Cast); | 3294 lowerCast(Cast); |
3308 // Make sure that the atomic load isn't elided when unused. | 3295 // Make sure that the atomic load isn't elided when unused. |
3309 Context.insert<InstFakeUse>(Dest64On32->getLo()); | 3296 Context.insert<InstFakeUse>(Dest64On32->getLo()); |
3310 Context.insert<InstFakeUse>(Dest64On32->getHi()); | 3297 Context.insert<InstFakeUse>(Dest64On32->getHi()); |
3311 return; | 3298 return; |
3312 } | 3299 } |
3313 } | 3300 } |
(...skipping 29 matching lines...) Expand all Loading... | |
3343 Operand *Value = Instr->getArg(0); | 3330 Operand *Value = Instr->getArg(0); |
3344 Operand *Ptr = Instr->getArg(1); | 3331 Operand *Ptr = Instr->getArg(1); |
3345 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { | 3332 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { |
3346 // Use a movq instead of what lowerStore() normally does (split the store | 3333 // Use a movq instead of what lowerStore() normally does (split the store |
3347 // into two), following what GCC does. Cast the bits from int -> to an | 3334 // into two), following what GCC does. Cast the bits from int -> to an |
3348 // xmm register first. | 3335 // xmm register first. |
3349 Variable *T = makeReg(IceType_f64); | 3336 Variable *T = makeReg(IceType_f64); |
3350 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); | 3337 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); |
3351 lowerCast(Cast); | 3338 lowerCast(Cast); |
3352 // Then store XMM w/ a movq. | 3339 // Then store XMM w/ a movq. |
3353 typename Traits::X86OperandMem *Addr = | 3340 X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64); |
3354 formMemoryOperand(Ptr, IceType_f64); | |
3355 _storeq(T, Addr); | 3341 _storeq(T, Addr); |
3356 _mfence(); | 3342 _mfence(); |
3357 return; | 3343 return; |
3358 } | 3344 } |
3359 auto *Store = InstStore::create(Func, Value, Ptr); | 3345 auto *Store = InstStore::create(Func, Value, Ptr); |
3360 lowerStore(Store); | 3346 lowerStore(Store); |
3361 _mfence(); | 3347 _mfence(); |
3362 return; | 3348 return; |
3363 } | 3349 } |
3364 case Intrinsics::Bswap: { | 3350 case Intrinsics::Bswap: { |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3488 return; | 3474 return; |
3489 } | 3475 } |
3490 case Intrinsics::Fabs: { | 3476 case Intrinsics::Fabs: { |
3491 Operand *Src = legalize(Instr->getArg(0)); | 3477 Operand *Src = legalize(Instr->getArg(0)); |
3492 Type Ty = Src->getType(); | 3478 Type Ty = Src->getType(); |
3493 Variable *Dest = Instr->getDest(); | 3479 Variable *Dest = Instr->getDest(); |
3494 Variable *T = makeVectorOfFabsMask(Ty); | 3480 Variable *T = makeVectorOfFabsMask(Ty); |
3495 // The pand instruction operates on an m128 memory operand, so if Src is an | 3481 // The pand instruction operates on an m128 memory operand, so if Src is an |
3496 // f32 or f64, we need to make sure it's in a register. | 3482 // f32 or f64, we need to make sure it's in a register. |
3497 if (isVectorType(Ty)) { | 3483 if (isVectorType(Ty)) { |
3498 if (llvm::isa<typename Traits::X86OperandMem>(Src)) | 3484 if (llvm::isa<X86OperandMem>(Src)) |
3499 Src = legalizeToReg(Src); | 3485 Src = legalizeToReg(Src); |
3500 } else { | 3486 } else { |
3501 Src = legalizeToReg(Src); | 3487 Src = legalizeToReg(Src); |
3502 } | 3488 } |
3503 _pand(T, Src); | 3489 _pand(T, Src); |
3504 if (isVectorType(Ty)) | 3490 if (isVectorType(Ty)) |
3505 _movp(Dest, T); | 3491 _movp(Dest, T); |
3506 else | 3492 else |
3507 _mov(Dest, T); | 3493 _mov(Dest, T); |
3508 return; | 3494 return; |
(...skipping 12 matching lines...) Expand all Loading... | |
3521 case Intrinsics::Memmove: { | 3507 case Intrinsics::Memmove: { |
3522 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); | 3508 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
3523 return; | 3509 return; |
3524 } | 3510 } |
3525 case Intrinsics::Memset: { | 3511 case Intrinsics::Memset: { |
3526 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); | 3512 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
3527 return; | 3513 return; |
3528 } | 3514 } |
3529 case Intrinsics::NaClReadTP: { | 3515 case Intrinsics::NaClReadTP: { |
3530 if (Ctx->getFlags().getUseSandboxing()) { | 3516 if (Ctx->getFlags().getUseSandboxing()) { |
3531 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); | 3517 Operand *Src = |
3518 dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand); | |
3532 Variable *Dest = Instr->getDest(); | 3519 Variable *Dest = Instr->getDest(); |
3533 Variable *T = nullptr; | 3520 Variable *T = nullptr; |
3534 _mov(T, Src); | 3521 _mov(T, Src); |
3535 _mov(Dest, T); | 3522 _mov(Dest, T); |
3536 } else { | 3523 } else { |
3537 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); | 3524 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); |
3538 lowerCall(Call); | 3525 lowerCall(Call); |
3539 } | 3526 } |
3540 return; | 3527 return; |
3541 } | 3528 } |
(...skipping 29 matching lines...) Expand all Loading... | |
3571 case Intrinsics::Trap: | 3558 case Intrinsics::Trap: |
3572 _ud2(); | 3559 _ud2(); |
3573 return; | 3560 return; |
3574 case Intrinsics::UnknownIntrinsic: | 3561 case Intrinsics::UnknownIntrinsic: |
3575 Func->setError("Should not be lowering UnknownIntrinsic"); | 3562 Func->setError("Should not be lowering UnknownIntrinsic"); |
3576 return; | 3563 return; |
3577 } | 3564 } |
3578 return; | 3565 return; |
3579 } | 3566 } |
3580 | 3567 |
3581 template <class Machine> | 3568 template <typename TraitsType> |
3582 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3569 void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev, |
3583 Operand *Ptr, Operand *Expected, | 3570 Operand *Ptr, |
3584 Operand *Desired) { | 3571 Operand *Expected, |
3572 Operand *Desired) { | |
3585 Type Ty = Expected->getType(); | 3573 Type Ty = Expected->getType(); |
3586 if (!Traits::Is64Bit && Ty == IceType_i64) { | 3574 if (!Traits::Is64Bit && Ty == IceType_i64) { |
3587 // Reserve the pre-colored registers first, before adding any more | 3575 // Reserve the pre-colored registers first, before adding any more |
3588 // infinite-weight variables from formMemoryOperand's legalization. | 3576 // infinite-weight variables from formMemoryOperand's legalization. |
3589 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3577 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
3590 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3578 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
3591 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3579 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
3592 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3580 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
3593 _mov(T_eax, loOperand(Expected)); | 3581 _mov(T_eax, loOperand(Expected)); |
3594 _mov(T_edx, hiOperand(Expected)); | 3582 _mov(T_edx, hiOperand(Expected)); |
3595 _mov(T_ebx, loOperand(Desired)); | 3583 _mov(T_ebx, loOperand(Desired)); |
3596 _mov(T_ecx, hiOperand(Desired)); | 3584 _mov(T_ecx, hiOperand(Desired)); |
3597 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3585 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3598 constexpr bool Locked = true; | 3586 constexpr bool Locked = true; |
3599 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3587 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3600 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3588 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
3601 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3589 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
3602 _mov(DestLo, T_eax); | 3590 _mov(DestLo, T_eax); |
3603 _mov(DestHi, T_edx); | 3591 _mov(DestHi, T_edx); |
3604 return; | 3592 return; |
3605 } | 3593 } |
3606 int32_t Eax; | 3594 int32_t Eax; |
3607 switch (Ty) { | 3595 switch (Ty) { |
3608 default: | 3596 default: |
3609 llvm::report_fatal_error("Bad type for cmpxchg"); | 3597 llvm::report_fatal_error("Bad type for cmpxchg"); |
3610 case IceType_i64: | 3598 case IceType_i64: |
3611 Eax = Traits::getRaxOrDie(); | 3599 Eax = Traits::getRaxOrDie(); |
3612 break; | 3600 break; |
3613 case IceType_i32: | 3601 case IceType_i32: |
3614 Eax = Traits::RegisterSet::Reg_eax; | 3602 Eax = Traits::RegisterSet::Reg_eax; |
3615 break; | 3603 break; |
3616 case IceType_i16: | 3604 case IceType_i16: |
3617 Eax = Traits::RegisterSet::Reg_ax; | 3605 Eax = Traits::RegisterSet::Reg_ax; |
3618 break; | 3606 break; |
3619 case IceType_i8: | 3607 case IceType_i8: |
3620 Eax = Traits::RegisterSet::Reg_al; | 3608 Eax = Traits::RegisterSet::Reg_al; |
3621 break; | 3609 break; |
3622 } | 3610 } |
3623 Variable *T_eax = makeReg(Ty, Eax); | 3611 Variable *T_eax = makeReg(Ty, Eax); |
3624 _mov(T_eax, Expected); | 3612 _mov(T_eax, Expected); |
3625 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3613 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3626 Variable *DesiredReg = legalizeToReg(Desired); | 3614 Variable *DesiredReg = legalizeToReg(Desired); |
3627 constexpr bool Locked = true; | 3615 constexpr bool Locked = true; |
3628 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3616 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
3629 _mov(DestPrev, T_eax); | 3617 _mov(DestPrev, T_eax); |
3630 } | 3618 } |
3631 | 3619 |
3632 template <class Machine> | 3620 template <typename TraitsType> |
3633 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 3621 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
3634 Operand *PtrToMem, | 3622 Operand *PtrToMem, |
3635 Operand *Expected, | 3623 Operand *Expected, |
3636 Operand *Desired) { | 3624 Operand *Desired) { |
3637 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3625 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
3638 return false; | 3626 return false; |
3639 // Peek ahead a few instructions and see how Dest is used. | 3627 // Peek ahead a few instructions and see how Dest is used. |
3640 // It's very common to have: | 3628 // It's very common to have: |
3641 // | 3629 // |
3642 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 3630 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
3643 // [%y_phi = ...] // list of phi stores | 3631 // [%y_phi = ...] // list of phi stores |
3644 // %p = icmp eq i32 %x, %expected | 3632 // %p = icmp eq i32 %x, %expected |
3645 // br i1 %p, label %l1, label %l2 | 3633 // br i1 %p, label %l1, label %l2 |
3646 // | 3634 // |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3698 NextBr->setDeleted(); | 3686 NextBr->setDeleted(); |
3699 Context.advanceNext(); | 3687 Context.advanceNext(); |
3700 Context.advanceNext(); | 3688 Context.advanceNext(); |
3701 return true; | 3689 return true; |
3702 } | 3690 } |
3703 } | 3691 } |
3704 } | 3692 } |
3705 return false; | 3693 return false; |
3706 } | 3694 } |
3707 | 3695 |
3708 template <class Machine> | 3696 template <typename TraitsType> |
3709 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3697 void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest, |
3710 Operand *Ptr, Operand *Val) { | 3698 uint32_t Operation, Operand *Ptr, |
3699 Operand *Val) { | |
3711 bool NeedsCmpxchg = false; | 3700 bool NeedsCmpxchg = false; |
3712 LowerBinOp Op_Lo = nullptr; | 3701 LowerBinOp Op_Lo = nullptr; |
3713 LowerBinOp Op_Hi = nullptr; | 3702 LowerBinOp Op_Hi = nullptr; |
3714 switch (Operation) { | 3703 switch (Operation) { |
3715 default: | 3704 default: |
3716 Func->setError("Unknown AtomicRMW operation"); | 3705 Func->setError("Unknown AtomicRMW operation"); |
3717 return; | 3706 return; |
3718 case Intrinsics::AtomicAdd: { | 3707 case Intrinsics::AtomicAdd: { |
3719 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3708 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3720 // All the fall-through paths must set this to true, but use this | 3709 // All the fall-through paths must set this to true, but use this |
3721 // for asserting. | 3710 // for asserting. |
3722 NeedsCmpxchg = true; | 3711 NeedsCmpxchg = true; |
3723 Op_Lo = &TargetX86Base<Machine>::_add; | 3712 Op_Lo = &TargetX86Base<TraitsType>::_add; |
3724 Op_Hi = &TargetX86Base<Machine>::_adc; | 3713 Op_Hi = &TargetX86Base<TraitsType>::_adc; |
3725 break; | 3714 break; |
3726 } | 3715 } |
3727 typename Traits::X86OperandMem *Addr = | 3716 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
3728 formMemoryOperand(Ptr, Dest->getType()); | |
3729 constexpr bool Locked = true; | 3717 constexpr bool Locked = true; |
3730 Variable *T = nullptr; | 3718 Variable *T = nullptr; |
3731 _mov(T, Val); | 3719 _mov(T, Val); |
3732 _xadd(Addr, T, Locked); | 3720 _xadd(Addr, T, Locked); |
3733 _mov(Dest, T); | 3721 _mov(Dest, T); |
3734 return; | 3722 return; |
3735 } | 3723 } |
3736 case Intrinsics::AtomicSub: { | 3724 case Intrinsics::AtomicSub: { |
3737 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3725 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3738 NeedsCmpxchg = true; | 3726 NeedsCmpxchg = true; |
3739 Op_Lo = &TargetX86Base<Machine>::_sub; | 3727 Op_Lo = &TargetX86Base<TraitsType>::_sub; |
3740 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3728 Op_Hi = &TargetX86Base<TraitsType>::_sbb; |
3741 break; | 3729 break; |
3742 } | 3730 } |
3743 typename Traits::X86OperandMem *Addr = | 3731 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
3744 formMemoryOperand(Ptr, Dest->getType()); | |
3745 constexpr bool Locked = true; | 3732 constexpr bool Locked = true; |
3746 Variable *T = nullptr; | 3733 Variable *T = nullptr; |
3747 _mov(T, Val); | 3734 _mov(T, Val); |
3748 _neg(T); | 3735 _neg(T); |
3749 _xadd(Addr, T, Locked); | 3736 _xadd(Addr, T, Locked); |
3750 _mov(Dest, T); | 3737 _mov(Dest, T); |
3751 return; | 3738 return; |
3752 } | 3739 } |
3753 case Intrinsics::AtomicOr: | 3740 case Intrinsics::AtomicOr: |
3754 // TODO(jvoung): If Dest is null or dead, then some of these | 3741 // TODO(jvoung): If Dest is null or dead, then some of these |
3755 // operations do not need an "exchange", but just a locked op. | 3742 // operations do not need an "exchange", but just a locked op. |
3756 // That appears to be "worth" it for sub, or, and, and xor. | 3743 // That appears to be "worth" it for sub, or, and, and xor. |
3757 // xadd is probably fine vs lock add for add, and xchg is fine | 3744 // xadd is probably fine vs lock add for add, and xchg is fine |
3758 // vs an atomic store. | 3745 // vs an atomic store. |
3759 NeedsCmpxchg = true; | 3746 NeedsCmpxchg = true; |
3760 Op_Lo = &TargetX86Base<Machine>::_or; | 3747 Op_Lo = &TargetX86Base<TraitsType>::_or; |
3761 Op_Hi = &TargetX86Base<Machine>::_or; | 3748 Op_Hi = &TargetX86Base<TraitsType>::_or; |
3762 break; | 3749 break; |
3763 case Intrinsics::AtomicAnd: | 3750 case Intrinsics::AtomicAnd: |
3764 NeedsCmpxchg = true; | 3751 NeedsCmpxchg = true; |
3765 Op_Lo = &TargetX86Base<Machine>::_and; | 3752 Op_Lo = &TargetX86Base<TraitsType>::_and; |
3766 Op_Hi = &TargetX86Base<Machine>::_and; | 3753 Op_Hi = &TargetX86Base<TraitsType>::_and; |
3767 break; | 3754 break; |
3768 case Intrinsics::AtomicXor: | 3755 case Intrinsics::AtomicXor: |
3769 NeedsCmpxchg = true; | 3756 NeedsCmpxchg = true; |
3770 Op_Lo = &TargetX86Base<Machine>::_xor; | 3757 Op_Lo = &TargetX86Base<TraitsType>::_xor; |
3771 Op_Hi = &TargetX86Base<Machine>::_xor; | 3758 Op_Hi = &TargetX86Base<TraitsType>::_xor; |
3772 break; | 3759 break; |
3773 case Intrinsics::AtomicExchange: | 3760 case Intrinsics::AtomicExchange: |
3774 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3761 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3775 NeedsCmpxchg = true; | 3762 NeedsCmpxchg = true; |
3776 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | 3763 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
3777 // just need to be moved to the ecx and ebx registers. | 3764 // just need to be moved to the ecx and ebx registers. |
3778 Op_Lo = nullptr; | 3765 Op_Lo = nullptr; |
3779 Op_Hi = nullptr; | 3766 Op_Hi = nullptr; |
3780 break; | 3767 break; |
3781 } | 3768 } |
3782 typename Traits::X86OperandMem *Addr = | 3769 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
3783 formMemoryOperand(Ptr, Dest->getType()); | |
3784 Variable *T = nullptr; | 3770 Variable *T = nullptr; |
3785 _mov(T, Val); | 3771 _mov(T, Val); |
3786 _xchg(Addr, T); | 3772 _xchg(Addr, T); |
3787 _mov(Dest, T); | 3773 _mov(Dest, T); |
3788 return; | 3774 return; |
3789 } | 3775 } |
3790 // Otherwise, we need a cmpxchg loop. | 3776 // Otherwise, we need a cmpxchg loop. |
3791 (void)NeedsCmpxchg; | 3777 (void)NeedsCmpxchg; |
3792 assert(NeedsCmpxchg); | 3778 assert(NeedsCmpxchg); |
3793 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); | 3779 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); |
3794 } | 3780 } |
3795 | 3781 |
3796 template <class Machine> | 3782 template <typename TraitsType> |
3797 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, | 3783 void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, |
3798 LowerBinOp Op_Hi, | 3784 LowerBinOp Op_Hi, |
3799 Variable *Dest, | 3785 Variable *Dest, |
3800 Operand *Ptr, | 3786 Operand *Ptr, |
3801 Operand *Val) { | 3787 Operand *Val) { |
3802 // Expand a more complex RMW operation as a cmpxchg loop: | 3788 // Expand a more complex RMW operation as a cmpxchg loop: |
3803 // For 64-bit: | 3789 // For 64-bit: |
3804 // mov eax, [ptr] | 3790 // mov eax, [ptr] |
3805 // mov edx, [ptr + 4] | 3791 // mov edx, [ptr + 4] |
3806 // .LABEL: | 3792 // .LABEL: |
3807 // mov ebx, eax | 3793 // mov ebx, eax |
3808 // <Op_Lo> ebx, <desired_adj_lo> | 3794 // <Op_Lo> ebx, <desired_adj_lo> |
3809 // mov ecx, edx | 3795 // mov ecx, edx |
3810 // <Op_Hi> ecx, <desired_adj_hi> | 3796 // <Op_Hi> ecx, <desired_adj_hi> |
3811 // lock cmpxchg8b [ptr] | 3797 // lock cmpxchg8b [ptr] |
3812 // jne .LABEL | 3798 // jne .LABEL |
3813 // mov <dest_lo>, eax | 3799 // mov <dest_lo>, eax |
3814 // mov <dest_lo>, edx | 3800 // mov <dest_lo>, edx |
3815 // | 3801 // |
3816 // For 32-bit: | 3802 // For 32-bit: |
3817 // mov eax, [ptr] | 3803 // mov eax, [ptr] |
3818 // .LABEL: | 3804 // .LABEL: |
3819 // mov <reg>, eax | 3805 // mov <reg>, eax |
3820 // op <reg>, [desired_adj] | 3806 // op <reg>, [desired_adj] |
3821 // lock cmpxchg [ptr], <reg> | 3807 // lock cmpxchg [ptr], <reg> |
3822 // jne .LABEL | 3808 // jne .LABEL |
3823 // mov <dest>, eax | 3809 // mov <dest>, eax |
3824 // | 3810 // |
3825 // If Op_{Lo,Hi} are nullptr, then just copy the value. | 3811 // If Op_{Lo,Hi} are nullptr, then just copy the value. |
3826 Val = legalize(Val); | 3812 Val = legalize(Val); |
3827 Type Ty = Val->getType(); | 3813 Type Ty = Val->getType(); |
3828 if (!Traits::Is64Bit && Ty == IceType_i64) { | 3814 if (!Traits::Is64Bit && Ty == IceType_i64) { |
3829 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3815 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
3830 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3816 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
3831 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3817 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3832 _mov(T_eax, loOperand(Addr)); | 3818 _mov(T_eax, loOperand(Addr)); |
3833 _mov(T_edx, hiOperand(Addr)); | 3819 _mov(T_edx, hiOperand(Addr)); |
3834 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3820 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
3835 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3821 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
3836 typename Traits::Insts::Label *Label = | 3822 InstX86Label *Label = InstX86Label::create(Func, this); |
3837 Traits::Insts::Label::create(Func, this); | |
3838 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; | 3823 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; |
3839 if (!IsXchg8b) { | 3824 if (!IsXchg8b) { |
3840 Context.insert(Label); | 3825 Context.insert(Label); |
3841 _mov(T_ebx, T_eax); | 3826 _mov(T_ebx, T_eax); |
3842 (this->*Op_Lo)(T_ebx, loOperand(Val)); | 3827 (this->*Op_Lo)(T_ebx, loOperand(Val)); |
3843 _mov(T_ecx, T_edx); | 3828 _mov(T_ecx, T_edx); |
3844 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3829 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
3845 } else { | 3830 } else { |
3846 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3831 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
3847 // It just needs the Val loaded into ebx and ecx. | 3832 // It just needs the Val loaded into ebx and ecx. |
(...skipping 21 matching lines...) Expand all Loading... | |
3869 } | 3854 } |
3870 // The address base (if any) is also reused in the loop. | 3855 // The address base (if any) is also reused in the loop. |
3871 if (Variable *Base = Addr->getBase()) | 3856 if (Variable *Base = Addr->getBase()) |
3872 Context.insert<InstFakeUse>(Base); | 3857 Context.insert<InstFakeUse>(Base); |
3873 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3858 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3874 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3859 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3875 _mov(DestLo, T_eax); | 3860 _mov(DestLo, T_eax); |
3876 _mov(DestHi, T_edx); | 3861 _mov(DestHi, T_edx); |
3877 return; | 3862 return; |
3878 } | 3863 } |
3879 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3864 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3880 int32_t Eax; | 3865 int32_t Eax; |
3881 switch (Ty) { | 3866 switch (Ty) { |
3882 default: | 3867 default: |
3883 llvm::report_fatal_error("Bad type for atomicRMW"); | 3868 llvm::report_fatal_error("Bad type for atomicRMW"); |
3884 case IceType_i64: | 3869 case IceType_i64: |
3885 Eax = Traits::getRaxOrDie(); | 3870 Eax = Traits::getRaxOrDie(); |
3886 break; | 3871 break; |
3887 case IceType_i32: | 3872 case IceType_i32: |
3888 Eax = Traits::RegisterSet::Reg_eax; | 3873 Eax = Traits::RegisterSet::Reg_eax; |
3889 break; | 3874 break; |
3890 case IceType_i16: | 3875 case IceType_i16: |
3891 Eax = Traits::RegisterSet::Reg_ax; | 3876 Eax = Traits::RegisterSet::Reg_ax; |
3892 break; | 3877 break; |
3893 case IceType_i8: | 3878 case IceType_i8: |
3894 Eax = Traits::RegisterSet::Reg_al; | 3879 Eax = Traits::RegisterSet::Reg_al; |
3895 break; | 3880 break; |
3896 } | 3881 } |
3897 Variable *T_eax = makeReg(Ty, Eax); | 3882 Variable *T_eax = makeReg(Ty, Eax); |
3898 _mov(T_eax, Addr); | 3883 _mov(T_eax, Addr); |
3899 auto *Label = Context.insert<typename Traits::Insts::Label>(this); | 3884 auto *Label = Context.insert<InstX86Label>(this); |
3900 // We want to pick a different register for T than Eax, so don't use | 3885 // We want to pick a different register for T than Eax, so don't use |
3901 // _mov(T == nullptr, T_eax). | 3886 // _mov(T == nullptr, T_eax). |
3902 Variable *T = makeReg(Ty); | 3887 Variable *T = makeReg(Ty); |
3903 _mov(T, T_eax); | 3888 _mov(T, T_eax); |
3904 (this->*Op_Lo)(T, Val); | 3889 (this->*Op_Lo)(T, Val); |
3905 constexpr bool Locked = true; | 3890 constexpr bool Locked = true; |
3906 _cmpxchg(Addr, T_eax, T, Locked); | 3891 _cmpxchg(Addr, T_eax, T, Locked); |
3907 _br(Traits::Cond::Br_ne, Label); | 3892 _br(Traits::Cond::Br_ne, Label); |
3908 // If Val is a variable, model the extended live range of Val through | 3893 // If Val is a variable, model the extended live range of Val through |
3909 // the end of the loop, since it will be re-used by the loop. | 3894 // the end of the loop, since it will be re-used by the loop. |
3910 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3895 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3911 Context.insert<InstFakeUse>(ValVar); | 3896 Context.insert<InstFakeUse>(ValVar); |
3912 } | 3897 } |
3913 // The address base (if any) is also reused in the loop. | 3898 // The address base (if any) is also reused in the loop. |
3914 if (Variable *Base = Addr->getBase()) | 3899 if (Variable *Base = Addr->getBase()) |
3915 Context.insert<InstFakeUse>(Base); | 3900 Context.insert<InstFakeUse>(Base); |
3916 _mov(Dest, T_eax); | 3901 _mov(Dest, T_eax); |
3917 } | 3902 } |
3918 | 3903 |
3919 /// Lowers count {trailing, leading} zeros intrinsic. | 3904 /// Lowers count {trailing, leading} zeros intrinsic. |
3920 /// | 3905 /// |
3921 /// We could do constant folding here, but that should have | 3906 /// We could do constant folding here, but that should have |
3922 /// been done by the front-end/middle-end optimizations. | 3907 /// been done by the front-end/middle-end optimizations. |
3923 template <class Machine> | 3908 template <typename TraitsType> |
3924 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3909 void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty, |
3925 Operand *FirstVal, | 3910 Variable *Dest, |
3926 Operand *SecondVal) { | 3911 Operand *FirstVal, |
3912 Operand *SecondVal) { | |
3927 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). | 3913 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
3928 // Then the instructions will handle the Val == 0 case much more simply | 3914 // Then the instructions will handle the Val == 0 case much more simply |
3929 // and won't require conversion from bit position to number of zeros. | 3915 // and won't require conversion from bit position to number of zeros. |
3930 // | 3916 // |
3931 // Otherwise: | 3917 // Otherwise: |
3932 // bsr IF_NOT_ZERO, Val | 3918 // bsr IF_NOT_ZERO, Val |
3933 // mov T_DEST, 63 | 3919 // mov T_DEST, 63 |
3934 // cmovne T_DEST, IF_NOT_ZERO | 3920 // cmovne T_DEST, IF_NOT_ZERO |
3935 // xor T_DEST, 31 | 3921 // xor T_DEST, 31 |
3936 // mov DEST, T_DEST | 3922 // mov DEST, T_DEST |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3988 } else { | 3974 } else { |
3989 _bsr(T_Dest2, SecondVar); | 3975 _bsr(T_Dest2, SecondVar); |
3990 _xor(T_Dest2, _31); | 3976 _xor(T_Dest2, _31); |
3991 } | 3977 } |
3992 _test(SecondVar, SecondVar); | 3978 _test(SecondVar, SecondVar); |
3993 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3979 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
3994 _mov(DestLo, T_Dest2); | 3980 _mov(DestLo, T_Dest2); |
3995 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3981 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
3996 } | 3982 } |
3997 | 3983 |
3998 template <class Machine> | 3984 template <typename TraitsType> |
3999 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, | 3985 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest, |
4000 Constant *Offset) { | 3986 Variable *Base, Constant *Offset) { |
4001 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 3987 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
4002 | 3988 |
4003 if (isVectorType(Ty)) | 3989 if (isVectorType(Ty)) |
4004 _movp(Dest, Mem); | 3990 _movp(Dest, Mem); |
4005 else if (Ty == IceType_f64) | 3991 else if (Ty == IceType_f64) |
4006 _movq(Dest, Mem); | 3992 _movq(Dest, Mem); |
4007 else | 3993 else |
4008 _mov(Dest, Mem); | 3994 _mov(Dest, Mem); |
4009 } | 3995 } |
4010 | 3996 |
4011 template <class Machine> | 3997 template <typename TraitsType> |
4012 void TargetX86Base<Machine>::typedStore(Type Ty, Variable *Value, | 3998 void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value, |
4013 Variable *Base, Constant *Offset) { | 3999 Variable *Base, Constant *Offset) { |
4014 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4000 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
4015 | 4001 |
4016 if (isVectorType(Ty)) | 4002 if (isVectorType(Ty)) |
4017 _storep(Value, Mem); | 4003 _storep(Value, Mem); |
4018 else if (Ty == IceType_f64) | 4004 else if (Ty == IceType_f64) |
4019 _storeq(Value, Mem); | 4005 _storeq(Value, Mem); |
4020 else | 4006 else |
4021 _store(Value, Mem); | 4007 _store(Value, Mem); |
4022 } | 4008 } |
4023 | 4009 |
4024 template <class Machine> | 4010 template <typename TraitsType> |
4025 void TargetX86Base<Machine>::copyMemory(Type Ty, Variable *Dest, Variable *Src, | 4011 void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest, |
4026 int32_t OffsetAmt) { | 4012 Variable *Src, int32_t OffsetAmt) { |
4027 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; | 4013 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; |
4028 // TODO(ascull): this or add nullptr test to _movp, _movq | 4014 // TODO(ascull): this or add nullptr test to _movp, _movq |
4029 Variable *Data = makeReg(Ty); | 4015 Variable *Data = makeReg(Ty); |
4030 | 4016 |
4031 typedLoad(Ty, Data, Src, Offset); | 4017 typedLoad(Ty, Data, Src, Offset); |
4032 typedStore(Ty, Data, Dest, Offset); | 4018 typedStore(Ty, Data, Dest, Offset); |
4033 } | 4019 } |
4034 | 4020 |
4035 template <class Machine> | 4021 template <typename TraitsType> |
4036 void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src, | 4022 void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src, |
4037 Operand *Count) { | 4023 Operand *Count) { |
4038 // There is a load and store for each chunk in the unroll | 4024 // There is a load and store for each chunk in the unroll |
4039 constexpr uint32_t BytesPerStorep = 16; | 4025 constexpr uint32_t BytesPerStorep = 16; |
4040 | 4026 |
4041 // Check if the operands are constants | 4027 // Check if the operands are constants |
4042 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | 4028 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
4043 const bool IsCountConst = CountConst != nullptr; | 4029 const bool IsCountConst = CountConst != nullptr; |
4044 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; | 4030 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; |
4045 | 4031 |
4046 if (shouldOptimizeMemIntrins() && IsCountConst && | 4032 if (shouldOptimizeMemIntrins() && IsCountConst && |
4047 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) { | 4033 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) { |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4079 } | 4065 } |
4080 | 4066 |
4081 // Fall back on a function call | 4067 // Fall back on a function call |
4082 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3); | 4068 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3); |
4083 Call->addArg(Dest); | 4069 Call->addArg(Dest); |
4084 Call->addArg(Src); | 4070 Call->addArg(Src); |
4085 Call->addArg(Count); | 4071 Call->addArg(Count); |
4086 lowerCall(Call); | 4072 lowerCall(Call); |
4087 } | 4073 } |
4088 | 4074 |
4089 template <class Machine> | 4075 template <typename TraitsType> |
4090 void TargetX86Base<Machine>::lowerMemmove(Operand *Dest, Operand *Src, | 4076 void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src, |
4091 Operand *Count) { | 4077 Operand *Count) { |
4092 // There is a load and store for each chunk in the unroll | 4078 // There is a load and store for each chunk in the unroll |
4093 constexpr uint32_t BytesPerStorep = 16; | 4079 constexpr uint32_t BytesPerStorep = 16; |
4094 | 4080 |
4095 // Check if the operands are constants | 4081 // Check if the operands are constants |
4096 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | 4082 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
4097 const bool IsCountConst = CountConst != nullptr; | 4083 const bool IsCountConst = CountConst != nullptr; |
4098 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; | 4084 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; |
4099 | 4085 |
4100 if (shouldOptimizeMemIntrins() && IsCountConst && | 4086 if (shouldOptimizeMemIntrins() && IsCountConst && |
4101 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) { | 4087 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) { |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4151 } | 4137 } |
4152 | 4138 |
4153 // Fall back on a function call | 4139 // Fall back on a function call |
4154 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); | 4140 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); |
4155 Call->addArg(Dest); | 4141 Call->addArg(Dest); |
4156 Call->addArg(Src); | 4142 Call->addArg(Src); |
4157 Call->addArg(Count); | 4143 Call->addArg(Count); |
4158 lowerCall(Call); | 4144 lowerCall(Call); |
4159 } | 4145 } |
4160 | 4146 |
4161 template <class Machine> | 4147 template <typename TraitsType> |
4162 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, | 4148 void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val, |
4163 Operand *Count) { | 4149 Operand *Count) { |
4164 constexpr uint32_t BytesPerStorep = 16; | 4150 constexpr uint32_t BytesPerStorep = 16; |
4165 constexpr uint32_t BytesPerStoreq = 8; | 4151 constexpr uint32_t BytesPerStoreq = 8; |
4166 constexpr uint32_t BytesPerStorei32 = 4; | 4152 constexpr uint32_t BytesPerStorei32 = 4; |
4167 assert(Val->getType() == IceType_i8); | 4153 assert(Val->getType() == IceType_i8); |
4168 | 4154 |
4169 // Check if the operands are constants | 4155 // Check if the operands are constants |
4170 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | 4156 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
4171 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); | 4157 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); |
4172 const bool IsCountConst = CountConst != nullptr; | 4158 const bool IsCountConst = CountConst != nullptr; |
4173 const bool IsValConst = ValConst != nullptr; | 4159 const bool IsValConst = ValConst != nullptr; |
(...skipping 12 matching lines...) Expand all Loading... | |
4186 Variable *VecReg = nullptr; | 4172 Variable *VecReg = nullptr; |
4187 const uint32_t SpreadValue = | 4173 const uint32_t SpreadValue = |
4188 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; | 4174 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; |
4189 | 4175 |
4190 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty, | 4176 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty, |
4191 uint32_t OffsetAmt) { | 4177 uint32_t OffsetAmt) { |
4192 assert(Base != nullptr); | 4178 assert(Base != nullptr); |
4193 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; | 4179 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; |
4194 | 4180 |
4195 // TODO(ascull): is 64-bit better with vector or scalar movq? | 4181 // TODO(ascull): is 64-bit better with vector or scalar movq? |
4196 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4182 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
4197 if (isVectorType(Ty)) { | 4183 if (isVectorType(Ty)) { |
4198 assert(VecReg != nullptr); | 4184 assert(VecReg != nullptr); |
4199 _storep(VecReg, Mem); | 4185 _storep(VecReg, Mem); |
4200 } else if (Ty == IceType_f64) { | 4186 } else if (Ty == IceType_f64) { |
4201 assert(VecReg != nullptr); | 4187 assert(VecReg != nullptr); |
4202 _storeq(VecReg, Mem); | 4188 _storeq(VecReg, Mem); |
4203 } else { | 4189 } else { |
4204 assert(Ty != IceType_i64); | 4190 assert(Ty != IceType_i64); |
4205 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); | 4191 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); |
4206 } | 4192 } |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4260 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); | 4246 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); |
4261 ValExt = ValExtVar; | 4247 ValExt = ValExtVar; |
4262 } | 4248 } |
4263 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | 4249 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); |
4264 Call->addArg(Dest); | 4250 Call->addArg(Dest); |
4265 Call->addArg(ValExt); | 4251 Call->addArg(ValExt); |
4266 Call->addArg(Count); | 4252 Call->addArg(Count); |
4267 lowerCall(Call); | 4253 lowerCall(Call); |
4268 } | 4254 } |
4269 | 4255 |
4270 template <class Machine> | 4256 template <typename TraitsType> |
4271 void TargetX86Base<Machine>::lowerIndirectJump(Variable *JumpTarget) { | 4257 void TargetX86Base<TraitsType>::lowerIndirectJump(Variable *JumpTarget) { |
4272 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 4258 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
4273 if (Traits::Is64Bit) { | 4259 if (Traits::Is64Bit) { |
4274 Variable *T = makeReg(IceType_i64); | 4260 Variable *T = makeReg(IceType_i64); |
4275 _movzx(T, JumpTarget); | 4261 _movzx(T, JumpTarget); |
4276 JumpTarget = T; | 4262 JumpTarget = T; |
4277 } | 4263 } |
4278 if (NeedSandboxing) { | 4264 if (NeedSandboxing) { |
4279 _bundle_lock(); | 4265 _bundle_lock(); |
4280 const SizeT BundleSize = | 4266 const SizeT BundleSize = |
4281 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 4267 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
(...skipping 376 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4658 /// For the purpose of mocking the bounds check, we'll do something like this: | 4644 /// For the purpose of mocking the bounds check, we'll do something like this: |
4659 /// | 4645 /// |
4660 /// cmp reg, 0 | 4646 /// cmp reg, 0 |
4661 /// je label | 4647 /// je label |
4662 /// cmp reg, 1 | 4648 /// cmp reg, 1 |
4663 /// je label | 4649 /// je label |
4664 /// label: | 4650 /// label: |
4665 /// | 4651 /// |
4666 /// Also note that we don't need to add a bounds check to a dereference of a | 4652 /// Also note that we don't need to add a bounds check to a dereference of a |
4667 /// simple global variable address. | 4653 /// simple global variable address. |
4668 template <class Machine> | 4654 template <typename TraitsType> |
4669 void TargetX86Base<Machine>::doMockBoundsCheck(Operand *Opnd) { | 4655 void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) { |
4670 if (!Ctx->getFlags().getMockBoundsCheck()) | 4656 if (!Ctx->getFlags().getMockBoundsCheck()) |
4671 return; | 4657 return; |
4672 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd)) { | 4658 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) { |
4673 if (Mem->getIndex()) { | 4659 if (Mem->getIndex()) { |
4674 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg"); | 4660 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg"); |
4675 } | 4661 } |
4676 Opnd = Mem->getBase(); | 4662 Opnd = Mem->getBase(); |
4677 } | 4663 } |
4678 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps | 4664 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps |
4679 // something else. We only care if it is Variable. | 4665 // something else. We only care if it is Variable. |
4680 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); | 4666 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); |
4681 if (Var == nullptr) | 4667 if (Var == nullptr) |
4682 return; | 4668 return; |
4683 // We use lowerStore() to copy out-args onto the stack. This creates a memory | 4669 // We use lowerStore() to copy out-args onto the stack. This creates a memory |
4684 // operand with the stack pointer as the base register. Don't do bounds | 4670 // operand with the stack pointer as the base register. Don't do bounds |
4685 // checks on that. | 4671 // checks on that. |
4686 if (Var->getRegNum() == static_cast<int32_t>(getStackReg())) | 4672 if (Var->getRegNum() == static_cast<int32_t>(getStackReg())) |
4687 return; | 4673 return; |
4688 | 4674 |
4689 auto *Label = Traits::Insts::Label::create(Func, this); | 4675 auto *Label = InstX86Label::create(Func, this); |
4690 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); | 4676 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); |
4691 _br(Traits::Cond::Br_e, Label); | 4677 _br(Traits::Cond::Br_e, Label); |
4692 _cmp(Opnd, Ctx->getConstantInt32(1)); | 4678 _cmp(Opnd, Ctx->getConstantInt32(1)); |
4693 _br(Traits::Cond::Br_e, Label); | 4679 _br(Traits::Cond::Br_e, Label); |
4694 Context.insert(Label); | 4680 Context.insert(Label); |
4695 } | 4681 } |
4696 | 4682 |
4697 template <class Machine> | 4683 template <typename TraitsType> |
4698 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { | 4684 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) { |
4699 // A Load instruction can be treated the same as an Assign instruction, after | 4685 // A Load instruction can be treated the same as an Assign instruction, after |
4700 // the source operand is transformed into an Traits::X86OperandMem operand. | 4686 // the source operand is transformed into an X86OperandMem operand. |
4701 // Note that the address mode optimization already creates an | 4687 // Note that the address mode optimization already creates an |
4702 // Traits::X86OperandMem operand, so it doesn't need another level of | 4688 // X86OperandMem operand, so it doesn't need another level of |
4703 // transformation. | 4689 // transformation. |
4704 Variable *DestLoad = Load->getDest(); | 4690 Variable *DestLoad = Load->getDest(); |
4705 Type Ty = DestLoad->getType(); | 4691 Type Ty = DestLoad->getType(); |
4706 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 4692 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
4707 doMockBoundsCheck(Src0); | 4693 doMockBoundsCheck(Src0); |
4708 auto *Assign = InstAssign::create(Func, DestLoad, Src0); | 4694 auto *Assign = InstAssign::create(Func, DestLoad, Src0); |
4709 lowerAssign(Assign); | 4695 lowerAssign(Assign); |
4710 } | 4696 } |
4711 | 4697 |
4712 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { | 4698 template <typename TraitsType> |
4699 void TargetX86Base<TraitsType>::doAddressOptLoad() { | |
4713 Inst *Inst = Context.getCur(); | 4700 Inst *Inst = Context.getCur(); |
4714 Variable *Dest = Inst->getDest(); | 4701 Variable *Dest = Inst->getDest(); |
4715 Operand *Addr = Inst->getSrc(0); | 4702 Operand *Addr = Inst->getSrc(0); |
4716 Variable *Index = nullptr; | 4703 Variable *Index = nullptr; |
4717 ConstantRelocatable *Relocatable = nullptr; | 4704 ConstantRelocatable *Relocatable = nullptr; |
4718 uint16_t Shift = 0; | 4705 uint16_t Shift = 0; |
4719 int32_t Offset = 0; | 4706 int32_t Offset = 0; |
4720 // Vanilla ICE load instructions should not use the segment registers, and | 4707 // Vanilla ICE load instructions should not use the segment registers, and |
4721 // computeAddressOpt only works at the level of Variables and Constants, not | 4708 // computeAddressOpt only works at the level of Variables and Constants, not |
4722 // other Traits::X86OperandMem, so there should be no mention of segment | 4709 // other X86OperandMem, so there should be no mention of segment |
4723 // registers there either. | 4710 // registers there either. |
4724 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = | 4711 const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment; |
4725 Traits::X86OperandMem::DefaultSegment; | |
4726 auto *Base = llvm::dyn_cast<Variable>(Addr); | 4712 auto *Base = llvm::dyn_cast<Variable>(Addr); |
4727 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { | 4713 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
4728 Inst->setDeleted(); | 4714 Inst->setDeleted(); |
4729 Constant *OffsetOp = nullptr; | 4715 Constant *OffsetOp = nullptr; |
4730 if (Relocatable == nullptr) { | 4716 if (Relocatable == nullptr) { |
4731 OffsetOp = Ctx->getConstantInt32(Offset); | 4717 OffsetOp = Ctx->getConstantInt32(Offset); |
4732 } else { | 4718 } else { |
4733 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 4719 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
4734 Relocatable->getName(), | 4720 Relocatable->getName(), |
4735 Relocatable->getSuppressMangling()); | 4721 Relocatable->getSuppressMangling()); |
4736 } | 4722 } |
4737 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, | 4723 Addr = X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
4738 Index, Shift, SegmentReg); | 4724 Shift, SegmentReg); |
4739 Context.insert<InstLoad>(Dest, Addr); | 4725 Context.insert<InstLoad>(Dest, Addr); |
4740 } | 4726 } |
4741 } | 4727 } |
4742 | 4728 |
4743 template <class Machine> | 4729 template <typename TraitsType> |
4744 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, | 4730 void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability, |
4745 RandomNumberGenerator &RNG) { | 4731 RandomNumberGenerator &RNG) { |
4746 RandomNumberGeneratorWrapper RNGW(RNG); | 4732 RandomNumberGeneratorWrapper RNGW(RNG); |
4747 if (RNGW.getTrueWithProbability(Probability)) { | 4733 if (RNGW.getTrueWithProbability(Probability)) { |
4748 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 4734 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
4749 } | 4735 } |
4750 } | 4736 } |
4751 | 4737 |
4752 template <class Machine> | 4738 template <typename TraitsType> |
4753 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | 4739 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { |
4754 Func->setError("Phi found in regular instruction list"); | 4740 Func->setError("Phi found in regular instruction list"); |
4755 } | 4741 } |
4756 | 4742 |
4757 template <class Machine> | 4743 template <typename TraitsType> |
4758 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) { | 4744 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { |
4759 Variable *Dest = Select->getDest(); | 4745 Variable *Dest = Select->getDest(); |
4760 | 4746 |
4761 if (isVectorType(Dest->getType())) { | 4747 if (isVectorType(Dest->getType())) { |
4762 lowerSelectVector(Select); | 4748 lowerSelectVector(Select); |
4763 return; | 4749 return; |
4764 } | 4750 } |
4765 | 4751 |
4766 Operand *Condition = Select->getCondition(); | 4752 Operand *Condition = Select->getCondition(); |
4767 // Handle folding opportunities. | 4753 // Handle folding opportunities. |
4768 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4754 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
(...skipping 11 matching lines...) Expand all Loading... | |
4780 return; | 4766 return; |
4781 } | 4767 } |
4782 } | 4768 } |
4783 } | 4769 } |
4784 | 4770 |
4785 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); | 4771 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); |
4786 Operand *Zero = Ctx->getConstantZero(IceType_i32); | 4772 Operand *Zero = Ctx->getConstantZero(IceType_i32); |
4787 _cmp(CmpResult, Zero); | 4773 _cmp(CmpResult, Zero); |
4788 Operand *SrcT = Select->getTrueOperand(); | 4774 Operand *SrcT = Select->getTrueOperand(); |
4789 Operand *SrcF = Select->getFalseOperand(); | 4775 Operand *SrcF = Select->getFalseOperand(); |
4790 const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; | 4776 const BrCond Cond = Traits::Cond::Br_ne; |
4791 lowerSelectMove(Dest, Cond, SrcT, SrcF); | 4777 lowerSelectMove(Dest, Cond, SrcT, SrcF); |
4792 } | 4778 } |
4793 | 4779 |
4794 template <class Machine> | 4780 template <typename TraitsType> |
4795 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, | 4781 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond, |
4796 typename Traits::Cond::BrCond Cond, | 4782 Operand *SrcT, Operand *SrcF) { |
4797 Operand *SrcT, Operand *SrcF) { | |
4798 Type DestTy = Dest->getType(); | 4783 Type DestTy = Dest->getType(); |
4799 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { | 4784 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
4800 // The cmov instruction doesn't allow 8-bit or FP operands, so we need | 4785 // The cmov instruction doesn't allow 8-bit or FP operands, so we need |
4801 // explicit control flow. | 4786 // explicit control flow. |
4802 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: | 4787 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
4803 auto *Label = Traits::Insts::Label::create(Func, this); | 4788 auto *Label = InstX86Label::create(Func, this); |
4804 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | 4789 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
4805 _mov(Dest, SrcT); | 4790 _mov(Dest, SrcT); |
4806 _br(Cond, Label); | 4791 _br(Cond, Label); |
4807 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | 4792 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
4808 _redefined(_mov(Dest, SrcF)); | 4793 _redefined(_mov(Dest, SrcF)); |
4809 Context.insert(Label); | 4794 Context.insert(Label); |
4810 return; | 4795 return; |
4811 } | 4796 } |
4812 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4797 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
4813 // But if SrcT is immediate, we might be able to do better, as the cmov | 4798 // But if SrcT is immediate, we might be able to do better, as the cmov |
4814 // instruction doesn't allow an immediate operand: | 4799 // instruction doesn't allow an immediate operand: |
4815 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t | 4800 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t |
4816 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { | 4801 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { |
4817 std::swap(SrcT, SrcF); | 4802 std::swap(SrcT, SrcF); |
4818 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); | 4803 Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond); |
4819 } | 4804 } |
4820 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 4805 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
4821 SrcT = legalizeUndef(SrcT); | 4806 SrcT = legalizeUndef(SrcT); |
4822 SrcF = legalizeUndef(SrcF); | 4807 SrcF = legalizeUndef(SrcF); |
4823 // Set the low portion. | 4808 // Set the low portion. |
4824 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4809 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
4825 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); | 4810 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); |
4826 // Set the high portion. | 4811 // Set the high portion. |
4827 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4812 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
4828 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); | 4813 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); |
4829 return; | 4814 return; |
4830 } | 4815 } |
4831 | 4816 |
4832 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || | 4817 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || |
4833 (Traits::Is64Bit && DestTy == IceType_i64)); | 4818 (Traits::Is64Bit && DestTy == IceType_i64)); |
4834 lowerSelectIntMove(Dest, Cond, SrcT, SrcF); | 4819 lowerSelectIntMove(Dest, Cond, SrcT, SrcF); |
4835 } | 4820 } |
4836 | 4821 |
4837 template <class Machine> | 4822 template <typename TraitsType> |
4838 void TargetX86Base<Machine>::lowerSelectIntMove( | 4823 void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond, |
4839 Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT, | 4824 Operand *SrcT, |
4840 Operand *SrcF) { | 4825 Operand *SrcF) { |
4841 Variable *T = nullptr; | 4826 Variable *T = nullptr; |
4842 SrcF = legalize(SrcF); | 4827 SrcF = legalize(SrcF); |
4843 _mov(T, SrcF); | 4828 _mov(T, SrcF); |
4844 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4829 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
4845 _cmov(T, SrcT, Cond); | 4830 _cmov(T, SrcT, Cond); |
4846 _mov(Dest, T); | 4831 _mov(Dest, T); |
4847 } | 4832 } |
4848 | 4833 |
4849 template <class Machine> | 4834 template <typename TraitsType> |
4850 void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src, | 4835 void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src, |
4851 bool IsRedefinition) { | 4836 bool IsRedefinition) { |
4852 assert(Dest->getType() == Src->getType()); | 4837 assert(Dest->getType() == Src->getType()); |
4853 assert(!Dest->isRematerializable()); | 4838 assert(!Dest->isRematerializable()); |
4854 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 4839 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
4855 Src = legalize(Src); | 4840 Src = legalize(Src); |
4856 Operand *SrcLo = loOperand(Src); | 4841 Operand *SrcLo = loOperand(Src); |
4857 Operand *SrcHi = hiOperand(Src); | 4842 Operand *SrcHi = hiOperand(Src); |
4858 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4843 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
4859 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4844 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
4860 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 4845 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
4861 _mov(T_Lo, SrcLo); | 4846 _mov(T_Lo, SrcLo); |
(...skipping 13 matching lines...) Expand all Loading... | |
4875 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); | 4860 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); |
4876 } | 4861 } |
4877 if (isVectorType(Dest->getType())) { | 4862 if (isVectorType(Dest->getType())) { |
4878 _redefined(_movp(Dest, SrcLegal), IsRedefinition); | 4863 _redefined(_movp(Dest, SrcLegal), IsRedefinition); |
4879 } else { | 4864 } else { |
4880 _redefined(_mov(Dest, SrcLegal), IsRedefinition); | 4865 _redefined(_mov(Dest, SrcLegal), IsRedefinition); |
4881 } | 4866 } |
4882 } | 4867 } |
4883 } | 4868 } |
4884 | 4869 |
4885 template <class Machine> | 4870 template <typename TraitsType> |
4886 bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, | 4871 bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect( |
4887 const InstSelect *Select) { | 4872 const InstFcmp *Fcmp, const InstSelect *Select) { |
4888 Operand *CmpSrc0 = Fcmp->getSrc(0); | 4873 Operand *CmpSrc0 = Fcmp->getSrc(0); |
4889 Operand *CmpSrc1 = Fcmp->getSrc(1); | 4874 Operand *CmpSrc1 = Fcmp->getSrc(1); |
4890 Operand *SelectSrcT = Select->getTrueOperand(); | 4875 Operand *SelectSrcT = Select->getTrueOperand(); |
4891 Operand *SelectSrcF = Select->getFalseOperand(); | 4876 Operand *SelectSrcF = Select->getFalseOperand(); |
4892 | 4877 |
4893 if (CmpSrc0->getType() != SelectSrcT->getType()) | 4878 if (CmpSrc0->getType() != SelectSrcT->getType()) |
4894 return false; | 4879 return false; |
4895 | 4880 |
4896 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. | 4881 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. |
4897 InstFcmp::FCond Condition = Fcmp->getCondition(); | 4882 InstFcmp::FCond Condition = Fcmp->getCondition(); |
4898 switch (Condition) { | 4883 switch (Condition) { |
4899 default: | 4884 default: |
4900 return false; | 4885 return false; |
4901 case InstFcmp::True: | 4886 case InstFcmp::True: |
4902 case InstFcmp::False: | 4887 case InstFcmp::False: |
4903 case InstFcmp::Ogt: | 4888 case InstFcmp::Ogt: |
4904 case InstFcmp::Olt: | 4889 case InstFcmp::Olt: |
4905 (void)CmpSrc0; | 4890 (void)CmpSrc0; |
4906 (void)CmpSrc1; | 4891 (void)CmpSrc1; |
4907 (void)SelectSrcT; | 4892 (void)SelectSrcT; |
4908 (void)SelectSrcF; | 4893 (void)SelectSrcF; |
4909 break; | 4894 break; |
4910 } | 4895 } |
4911 return false; | 4896 return false; |
4912 } | 4897 } |
4913 | 4898 |
4914 template <class Machine> | 4899 template <typename TraitsType> |
4915 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) { | 4900 void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) { |
4916 Variable *Dest = Icmp->getDest(); | 4901 Variable *Dest = Icmp->getDest(); |
4917 if (isVectorType(Dest->getType())) { | 4902 if (isVectorType(Dest->getType())) { |
4918 lowerIcmpVector(Icmp); | 4903 lowerIcmpVector(Icmp); |
4919 } else { | 4904 } else { |
4920 constexpr Inst *Consumer = nullptr; | 4905 constexpr Inst *Consumer = nullptr; |
4921 lowerIcmpAndConsumer(Icmp, Consumer); | 4906 lowerIcmpAndConsumer(Icmp, Consumer); |
4922 } | 4907 } |
4923 } | 4908 } |
4924 | 4909 |
4925 template <class Machine> | 4910 template <typename TraitsType> |
4926 void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) { | 4911 void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Inst) { |
4927 Variable *Dest = Inst->getDest(); | 4912 Variable *Dest = Inst->getDest(); |
4928 Type DestTy = Dest->getType(); | 4913 Type DestTy = Dest->getType(); |
4929 Operand *SrcT = Inst->getTrueOperand(); | 4914 Operand *SrcT = Inst->getTrueOperand(); |
4930 Operand *SrcF = Inst->getFalseOperand(); | 4915 Operand *SrcF = Inst->getFalseOperand(); |
4931 Operand *Condition = Inst->getCondition(); | 4916 Operand *Condition = Inst->getCondition(); |
4932 | 4917 |
4933 if (!isVectorType(DestTy)) | 4918 if (!isVectorType(DestTy)) |
4934 llvm::report_fatal_error("Expected a vector select"); | 4919 llvm::report_fatal_error("Expected a vector select"); |
4935 | 4920 |
4936 Type SrcTy = SrcT->getType(); | 4921 Type SrcTy = SrcT->getType(); |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4983 } | 4968 } |
4984 _movp(T2, T); | 4969 _movp(T2, T); |
4985 _pand(T, SrcTRM); | 4970 _pand(T, SrcTRM); |
4986 _pandn(T2, SrcFRM); | 4971 _pandn(T2, SrcFRM); |
4987 _por(T, T2); | 4972 _por(T, T2); |
4988 _movp(Dest, T); | 4973 _movp(Dest, T); |
4989 | 4974 |
4990 return; | 4975 return; |
4991 } | 4976 } |
4992 | 4977 |
4993 template <class Machine> | 4978 template <typename TraitsType> |
4994 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | 4979 void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) { |
4995 Operand *Value = Inst->getData(); | 4980 Operand *Value = Inst->getData(); |
4996 Operand *Addr = Inst->getAddr(); | 4981 Operand *Addr = Inst->getAddr(); |
4997 typename Traits::X86OperandMem *NewAddr = | 4982 X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
4998 formMemoryOperand(Addr, Value->getType()); | |
4999 doMockBoundsCheck(NewAddr); | 4983 doMockBoundsCheck(NewAddr); |
5000 Type Ty = NewAddr->getType(); | 4984 Type Ty = NewAddr->getType(); |
5001 | 4985 |
5002 if (!Traits::Is64Bit && Ty == IceType_i64) { | 4986 if (!Traits::Is64Bit && Ty == IceType_i64) { |
5003 Value = legalizeUndef(Value); | 4987 Value = legalizeUndef(Value); |
5004 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); | 4988 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); |
5005 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); | 4989 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); |
5006 _store(ValueHi, | 4990 _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr))); |
5007 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); | 4991 _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr))); |
5008 _store(ValueLo, | |
5009 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); | |
5010 } else if (isVectorType(Ty)) { | 4992 } else if (isVectorType(Ty)) { |
5011 _storep(legalizeToReg(Value), NewAddr); | 4993 _storep(legalizeToReg(Value), NewAddr); |
5012 } else { | 4994 } else { |
5013 Value = legalize(Value, Legal_Reg | Legal_Imm); | 4995 Value = legalize(Value, Legal_Reg | Legal_Imm); |
5014 _store(Value, NewAddr); | 4996 _store(Value, NewAddr); |
5015 } | 4997 } |
5016 } | 4998 } |
5017 | 4999 |
5018 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { | 5000 template <typename TraitsType> |
5001 void TargetX86Base<TraitsType>::doAddressOptStore() { | |
5019 auto *Inst = llvm::cast<InstStore>(Context.getCur()); | 5002 auto *Inst = llvm::cast<InstStore>(Context.getCur()); |
5020 Operand *Data = Inst->getData(); | 5003 Operand *Data = Inst->getData(); |
5021 Operand *Addr = Inst->getAddr(); | 5004 Operand *Addr = Inst->getAddr(); |
5022 Variable *Index = nullptr; | 5005 Variable *Index = nullptr; |
5023 ConstantRelocatable *Relocatable = nullptr; | 5006 ConstantRelocatable *Relocatable = nullptr; |
5024 uint16_t Shift = 0; | 5007 uint16_t Shift = 0; |
5025 int32_t Offset = 0; | 5008 int32_t Offset = 0; |
5026 auto *Base = llvm::dyn_cast<Variable>(Addr); | 5009 auto *Base = llvm::dyn_cast<Variable>(Addr); |
5027 // Vanilla ICE store instructions should not use the segment registers, and | 5010 // Vanilla ICE store instructions should not use the segment registers, and |
5028 // computeAddressOpt only works at the level of Variables and Constants, not | 5011 // computeAddressOpt only works at the level of Variables and Constants, not |
5029 // other Traits::X86OperandMem, so there should be no mention of segment | 5012 // other X86OperandMem, so there should be no mention of segment |
5030 // registers there either. | 5013 // registers there either. |
5031 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = | 5014 const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment; |
5032 Traits::X86OperandMem::DefaultSegment; | |
5033 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { | 5015 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { |
5034 Inst->setDeleted(); | 5016 Inst->setDeleted(); |
5035 Constant *OffsetOp = nullptr; | 5017 Constant *OffsetOp = nullptr; |
5036 if (Relocatable == nullptr) { | 5018 if (Relocatable == nullptr) { |
5037 OffsetOp = Ctx->getConstantInt32(Offset); | 5019 OffsetOp = Ctx->getConstantInt32(Offset); |
5038 } else { | 5020 } else { |
5039 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, | 5021 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, |
5040 Relocatable->getName(), | 5022 Relocatable->getName(), |
5041 Relocatable->getSuppressMangling()); | 5023 Relocatable->getSuppressMangling()); |
5042 } | 5024 } |
5043 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, | 5025 Addr = X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, Index, |
5044 Index, Shift, SegmentReg); | 5026 Shift, SegmentReg); |
5045 auto *NewStore = Context.insert<InstStore>(Data, Addr); | 5027 auto *NewStore = Context.insert<InstStore>(Data, Addr); |
5046 if (Inst->getDest()) | 5028 if (Inst->getDest()) |
5047 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 5029 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
5048 } | 5030 } |
5049 } | 5031 } |
5050 | 5032 |
5051 template <class Machine> | 5033 template <typename TraitsType> |
5052 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, | 5034 Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison, |
5053 uint64_t Min, uint64_t Max) { | 5035 uint64_t Min, uint64_t Max) { |
5054 // TODO(ascull): 64-bit should not reach here but only because it is not | 5036 // TODO(ascull): 64-bit should not reach here but only because it is not |
5055 // implemented yet. This should be able to handle the 64-bit case. | 5037 // implemented yet. This should be able to handle the 64-bit case. |
5056 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); | 5038 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); |
5057 // Subtracting 0 is a nop so don't do it | 5039 // Subtracting 0 is a nop so don't do it |
5058 if (Min != 0) { | 5040 if (Min != 0) { |
5059 // Avoid clobbering the comparison by copying it | 5041 // Avoid clobbering the comparison by copying it |
5060 Variable *T = nullptr; | 5042 Variable *T = nullptr; |
5061 _mov(T, Comparison); | 5043 _mov(T, Comparison); |
5062 _sub(T, Ctx->getConstantInt32(Min)); | 5044 _sub(T, Ctx->getConstantInt32(Min)); |
5063 Comparison = T; | 5045 Comparison = T; |
5064 } | 5046 } |
5065 | 5047 |
5066 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 5048 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
5067 | 5049 |
5068 return Comparison; | 5050 return Comparison; |
5069 } | 5051 } |
5070 | 5052 |
5071 template <class Machine> | 5053 template <typename TraitsType> |
5072 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, | 5054 void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case, |
5073 Operand *Comparison, bool DoneCmp, | 5055 Operand *Comparison, |
5074 CfgNode *DefaultTarget) { | 5056 bool DoneCmp, |
5057 CfgNode *DefaultTarget) { | |
5075 switch (Case.getKind()) { | 5058 switch (Case.getKind()) { |
5076 case CaseCluster::JumpTable: { | 5059 case CaseCluster::JumpTable: { |
5077 typename Traits::Insts::Label *SkipJumpTable; | 5060 InstX86Label *SkipJumpTable; |
5078 | 5061 |
5079 Operand *RangeIndex = | 5062 Operand *RangeIndex = |
5080 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 5063 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
5081 if (DefaultTarget == nullptr) { | 5064 if (DefaultTarget == nullptr) { |
5082 // Skip over jump table logic if comparison not in range and no default | 5065 // Skip over jump table logic if comparison not in range and no default |
5083 SkipJumpTable = Traits::Insts::Label::create(Func, this); | 5066 SkipJumpTable = InstX86Label::create(Func, this); |
5084 _br(Traits::Cond::Br_a, SkipJumpTable); | 5067 _br(Traits::Cond::Br_a, SkipJumpTable); |
5085 } else { | 5068 } else { |
5086 _br(Traits::Cond::Br_a, DefaultTarget); | 5069 _br(Traits::Cond::Br_a, DefaultTarget); |
5087 } | 5070 } |
5088 | 5071 |
5089 InstJumpTable *JumpTable = Case.getJumpTable(); | 5072 InstJumpTable *JumpTable = Case.getJumpTable(); |
5090 Context.insert(JumpTable); | 5073 Context.insert(JumpTable); |
5091 | 5074 |
5092 // Make sure the index is a register of the same width as the base | 5075 // Make sure the index is a register of the same width as the base |
5093 Variable *Index; | 5076 Variable *Index; |
5094 if (RangeIndex->getType() != getPointerType()) { | 5077 if (RangeIndex->getType() != getPointerType()) { |
5095 Index = makeReg(getPointerType()); | 5078 Index = makeReg(getPointerType()); |
5096 _movzx(Index, RangeIndex); | 5079 _movzx(Index, RangeIndex); |
5097 } else { | 5080 } else { |
5098 Index = legalizeToReg(RangeIndex); | 5081 Index = legalizeToReg(RangeIndex); |
5099 } | 5082 } |
5100 | 5083 |
5101 constexpr RelocOffsetT RelocOffset = 0; | 5084 constexpr RelocOffsetT RelocOffset = 0; |
5102 constexpr bool SuppressMangling = true; | 5085 constexpr bool SuppressMangling = true; |
5103 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); | 5086 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
5104 Constant *Base = Ctx->getConstantSym( | 5087 Constant *Base = Ctx->getConstantSym( |
5105 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), | 5088 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), |
5106 SuppressMangling); | 5089 SuppressMangling); |
5107 Constant *Offset = nullptr; | 5090 Constant *Offset = nullptr; |
5108 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); | 5091 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); |
5109 // TODO(ascull): remove need for legalize by allowing null base in memop | 5092 // TODO(ascull): remove need for legalize by allowing null base in memop |
5110 auto *TargetInMemory = Traits::X86OperandMem::create( | 5093 auto *TargetInMemory = X86OperandMem::create( |
5111 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); | 5094 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); |
5112 Variable *Target = nullptr; | 5095 Variable *Target = nullptr; |
5113 _mov(Target, TargetInMemory); | 5096 _mov(Target, TargetInMemory); |
5114 lowerIndirectJump(Target); | 5097 lowerIndirectJump(Target); |
5115 | 5098 |
5116 if (DefaultTarget == nullptr) | 5099 if (DefaultTarget == nullptr) |
5117 Context.insert(SkipJumpTable); | 5100 Context.insert(SkipJumpTable); |
5118 return; | 5101 return; |
5119 } | 5102 } |
5120 case CaseCluster::Range: { | 5103 case CaseCluster::Range: { |
(...skipping 15 matching lines...) Expand all Loading... | |
5136 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 5119 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
5137 _br(Traits::Cond::Br_be, Case.getTarget()); | 5120 _br(Traits::Cond::Br_be, Case.getTarget()); |
5138 } | 5121 } |
5139 if (DefaultTarget != nullptr) | 5122 if (DefaultTarget != nullptr) |
5140 _br(DefaultTarget); | 5123 _br(DefaultTarget); |
5141 return; | 5124 return; |
5142 } | 5125 } |
5143 } | 5126 } |
5144 } | 5127 } |
5145 | 5128 |
5146 template <class Machine> | 5129 template <typename TraitsType> |
5147 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 5130 void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Inst) { |
5148 // Group cases together and navigate through them with a binary search | 5131 // Group cases together and navigate through them with a binary search |
5149 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 5132 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
5150 Operand *Src0 = Inst->getComparison(); | 5133 Operand *Src0 = Inst->getComparison(); |
5151 CfgNode *DefaultTarget = Inst->getLabelDefault(); | 5134 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
5152 | 5135 |
5153 assert(CaseClusters.size() != 0); // Should always be at least one | 5136 assert(CaseClusters.size() != 0); // Should always be at least one |
5154 | 5137 |
5155 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { | 5138 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
5156 Src0 = legalize(Src0); // get Base/Index into physical registers | 5139 Src0 = legalize(Src0); // get Base/Index into physical registers |
5157 Operand *Src0Lo = loOperand(Src0); | 5140 Operand *Src0Lo = loOperand(Src0); |
5158 Operand *Src0Hi = hiOperand(Src0); | 5141 Operand *Src0Hi = hiOperand(Src0); |
5159 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 5142 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
5160 // TODO(ascull): handle 64-bit case properly (currently naive version) | 5143 // TODO(ascull): handle 64-bit case properly (currently naive version) |
5161 // This might be handled by a higher level lowering of switches. | 5144 // This might be handled by a higher level lowering of switches. |
5162 SizeT NumCases = Inst->getNumCases(); | 5145 SizeT NumCases = Inst->getNumCases(); |
5163 if (NumCases >= 2) { | 5146 if (NumCases >= 2) { |
5164 Src0Lo = legalizeToReg(Src0Lo); | 5147 Src0Lo = legalizeToReg(Src0Lo); |
5165 Src0Hi = legalizeToReg(Src0Hi); | 5148 Src0Hi = legalizeToReg(Src0Hi); |
5166 } else { | 5149 } else { |
5167 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | 5150 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); |
5168 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 5151 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
5169 } | 5152 } |
5170 for (SizeT I = 0; I < NumCases; ++I) { | 5153 for (SizeT I = 0; I < NumCases; ++I) { |
5171 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); | 5154 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); |
5172 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); | 5155 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); |
5173 typename Traits::Insts::Label *Label = | 5156 InstX86Label *Label = InstX86Label::create(Func, this); |
5174 Traits::Insts::Label::create(Func, this); | |
5175 _cmp(Src0Lo, ValueLo); | 5157 _cmp(Src0Lo, ValueLo); |
5176 _br(Traits::Cond::Br_ne, Label); | 5158 _br(Traits::Cond::Br_ne, Label); |
5177 _cmp(Src0Hi, ValueHi); | 5159 _cmp(Src0Hi, ValueHi); |
5178 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | 5160 _br(Traits::Cond::Br_e, Inst->getLabel(I)); |
5179 Context.insert(Label); | 5161 Context.insert(Label); |
5180 } | 5162 } |
5181 _br(Inst->getLabelDefault()); | 5163 _br(Inst->getLabelDefault()); |
5182 return; | 5164 return; |
5183 } else { | 5165 } else { |
5184 // All the values are 32-bit so just check the operand is too and then | 5166 // All the values are 32-bit so just check the operand is too and then |
(...skipping 14 matching lines...) Expand all Loading... | |
5199 constexpr bool DoneCmp = false; | 5181 constexpr bool DoneCmp = false; |
5200 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); | 5182 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); |
5201 return; | 5183 return; |
5202 } | 5184 } |
5203 | 5185 |
5204 // Going to be using multiple times so get it in a register early | 5186 // Going to be using multiple times so get it in a register early |
5205 Variable *Comparison = legalizeToReg(Src0); | 5187 Variable *Comparison = legalizeToReg(Src0); |
5206 | 5188 |
5207 // A span is over the clusters | 5189 // A span is over the clusters |
5208 struct SearchSpan { | 5190 struct SearchSpan { |
5209 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) | 5191 SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label) |
5210 : Begin(Begin), Size(Size), Label(Label) {} | 5192 : Begin(Begin), Size(Size), Label(Label) {} |
5211 | 5193 |
5212 SizeT Begin; | 5194 SizeT Begin; |
5213 SizeT Size; | 5195 SizeT Size; |
5214 typename Traits::Insts::Label *Label; | 5196 InstX86Label *Label; |
5215 }; | 5197 }; |
5216 // The stack will only grow to the height of the tree so 12 should be plenty | 5198 // The stack will only grow to the height of the tree so 12 should be plenty |
5217 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack; | 5199 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack; |
5218 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr); | 5200 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr); |
5219 bool DoneCmp = false; | 5201 bool DoneCmp = false; |
5220 | 5202 |
5221 while (!SearchSpanStack.empty()) { | 5203 while (!SearchSpanStack.empty()) { |
5222 SearchSpan Span = SearchSpanStack.top(); | 5204 SearchSpan Span = SearchSpanStack.top(); |
5223 SearchSpanStack.pop(); | 5205 SearchSpanStack.pop(); |
5224 | 5206 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5258 DoneCmp = false; | 5240 DoneCmp = false; |
5259 lowerCaseCluster(*CaseB, Comparison, DoneCmp, | 5241 lowerCaseCluster(*CaseB, Comparison, DoneCmp, |
5260 SearchSpanStack.empty() ? nullptr : DefaultTarget); | 5242 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
5261 } break; | 5243 } break; |
5262 | 5244 |
5263 default: | 5245 default: |
5264 // Pick the middle item and branch b or ae | 5246 // Pick the middle item and branch b or ae |
5265 SizeT PivotIndex = Span.Begin + (Span.Size / 2); | 5247 SizeT PivotIndex = Span.Begin + (Span.Size / 2); |
5266 const CaseCluster &Pivot = CaseClusters[PivotIndex]; | 5248 const CaseCluster &Pivot = CaseClusters[PivotIndex]; |
5267 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); | 5249 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); |
5268 typename Traits::Insts::Label *Label = | 5250 InstX86Label *Label = InstX86Label::create(Func, this); |
5269 Traits::Insts::Label::create(Func, this); | |
5270 _cmp(Comparison, Value); | 5251 _cmp(Comparison, Value); |
5271 // TODO(ascull): does it alway have to be far? | 5252 // TODO(ascull): does it alway have to be far? |
5272 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far); | 5253 _br(Traits::Cond::Br_b, Label, InstX86Br::Far); |
5273 // Lower the left and (pivot+right) sides, falling through to the right | 5254 // Lower the left and (pivot+right) sides, falling through to the right |
5274 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); | 5255 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); |
5275 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); | 5256 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); |
5276 DoneCmp = true; | 5257 DoneCmp = true; |
5277 break; | 5258 break; |
5278 } | 5259 } |
5279 } | 5260 } |
5280 | 5261 |
5281 _br(DefaultTarget); | 5262 _br(DefaultTarget); |
5282 } | 5263 } |
5283 | 5264 |
5284 template <class Machine> | 5265 template <typename TraitsType> |
5285 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 5266 void TargetX86Base<TraitsType>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
5286 Variable *Dest, Operand *Src0, | 5267 Variable *Dest, |
5287 Operand *Src1) { | 5268 Operand *Src0, |
5269 Operand *Src1) { | |
5288 assert(isVectorType(Dest->getType())); | 5270 assert(isVectorType(Dest->getType())); |
5289 Type Ty = Dest->getType(); | 5271 Type Ty = Dest->getType(); |
5290 Type ElementTy = typeElementType(Ty); | 5272 Type ElementTy = typeElementType(Ty); |
5291 SizeT NumElements = typeNumElements(Ty); | 5273 SizeT NumElements = typeNumElements(Ty); |
5292 | 5274 |
5293 Operand *T = Ctx->getConstantUndef(Ty); | 5275 Operand *T = Ctx->getConstantUndef(Ty); |
5294 for (SizeT I = 0; I < NumElements; ++I) { | 5276 for (SizeT I = 0; I < NumElements; ++I) { |
5295 Constant *Index = Ctx->getConstantInt32(I); | 5277 Constant *Index = Ctx->getConstantInt32(I); |
5296 | 5278 |
5297 // Extract the next two inputs. | 5279 // Extract the next two inputs. |
(...skipping 18 matching lines...) Expand all Loading... | |
5316 } | 5298 } |
5317 | 5299 |
5318 /// The following pattern occurs often in lowered C and C++ code: | 5300 /// The following pattern occurs often in lowered C and C++ code: |
5319 /// | 5301 /// |
5320 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 5302 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
5321 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> | 5303 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
5322 /// | 5304 /// |
5323 /// We can eliminate the sext operation by copying the result of pcmpeqd, | 5305 /// We can eliminate the sext operation by copying the result of pcmpeqd, |
5324 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the | 5306 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the |
5325 /// sext operation. | 5307 /// sext operation. |
5326 template <class Machine> | 5308 template <typename TraitsType> |
5327 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( | 5309 void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction( |
5328 Variable *SignExtendedResult) { | 5310 Variable *SignExtendedResult) { |
5329 if (auto *NextCast = | 5311 if (auto *NextCast = |
5330 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | 5312 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
5331 if (NextCast->getCastKind() == InstCast::Sext && | 5313 if (NextCast->getCastKind() == InstCast::Sext && |
5332 NextCast->getSrc(0) == SignExtendedResult) { | 5314 NextCast->getSrc(0) == SignExtendedResult) { |
5333 NextCast->setDeleted(); | 5315 NextCast->setDeleted(); |
5334 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult)); | 5316 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult)); |
5335 // Skip over the instruction. | 5317 // Skip over the instruction. |
5336 Context.advanceNext(); | 5318 Context.advanceNext(); |
5337 } | 5319 } |
5338 } | 5320 } |
5339 } | 5321 } |
5340 | 5322 |
5341 template <class Machine> | 5323 template <typename TraitsType> |
5342 void TargetX86Base<Machine>::lowerUnreachable( | 5324 void TargetX86Base<TraitsType>::lowerUnreachable( |
5343 const InstUnreachable * /*Inst*/) { | 5325 const InstUnreachable * /*Inst*/) { |
5344 _ud2(); | 5326 _ud2(); |
5345 // Add a fake use of esp to make sure esp adjustments after the unreachable | 5327 // Add a fake use of esp to make sure esp adjustments after the unreachable |
5346 // do not get dead-code eliminated. | 5328 // do not get dead-code eliminated. |
5347 keepEspLiveAtExit(); | 5329 keepEspLiveAtExit(); |
5348 } | 5330 } |
5349 | 5331 |
5350 template <class Machine> | 5332 template <typename TraitsType> |
5351 void TargetX86Base<Machine>::lowerRMW( | 5333 void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) { |
5352 const typename Traits::Insts::FakeRMW *RMW) { | |
5353 // If the beacon variable's live range does not end in this instruction, then | 5334 // If the beacon variable's live range does not end in this instruction, then |
5354 // it must end in the modified Store instruction that follows. This means | 5335 // it must end in the modified Store instruction that follows. This means |
5355 // that the original Store instruction is still there, either because the | 5336 // that the original Store instruction is still there, either because the |
5356 // value being stored is used beyond the Store instruction, or because dead | 5337 // value being stored is used beyond the Store instruction, or because dead |
5357 // code elimination did not happen. In either case, we cancel RMW lowering | 5338 // code elimination did not happen. In either case, we cancel RMW lowering |
5358 // (and the caller deletes the RMW instruction). | 5339 // (and the caller deletes the RMW instruction). |
5359 if (!RMW->isLastUse(RMW->getBeacon())) | 5340 if (!RMW->isLastUse(RMW->getBeacon())) |
5360 return; | 5341 return; |
5361 Operand *Src = RMW->getData(); | 5342 Operand *Src = RMW->getData(); |
5362 Type Ty = Src->getType(); | 5343 Type Ty = Src->getType(); |
5363 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); | 5344 X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
5364 doMockBoundsCheck(Addr); | 5345 doMockBoundsCheck(Addr); |
5365 if (!Traits::Is64Bit && Ty == IceType_i64) { | 5346 if (!Traits::Is64Bit && Ty == IceType_i64) { |
5366 Src = legalizeUndef(Src); | 5347 Src = legalizeUndef(Src); |
5367 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); | 5348 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); |
5368 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); | 5349 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); |
5369 typename Traits::X86OperandMem *AddrLo = | 5350 X86OperandMem *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr)); |
5370 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); | 5351 X86OperandMem *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr)); |
5371 typename Traits::X86OperandMem *AddrHi = | |
5372 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); | |
5373 switch (RMW->getOp()) { | 5352 switch (RMW->getOp()) { |
5374 default: | 5353 default: |
5375 // TODO(stichnot): Implement other arithmetic operators. | 5354 // TODO(stichnot): Implement other arithmetic operators. |
5376 break; | 5355 break; |
5377 case InstArithmetic::Add: | 5356 case InstArithmetic::Add: |
5378 _add_rmw(AddrLo, SrcLo); | 5357 _add_rmw(AddrLo, SrcLo); |
5379 _adc_rmw(AddrHi, SrcHi); | 5358 _adc_rmw(AddrHi, SrcHi); |
5380 return; | 5359 return; |
5381 case InstArithmetic::Sub: | 5360 case InstArithmetic::Sub: |
5382 _sub_rmw(AddrLo, SrcLo); | 5361 _sub_rmw(AddrLo, SrcLo); |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5420 return; | 5399 return; |
5421 case InstArithmetic::Xor: | 5400 case InstArithmetic::Xor: |
5422 Src = legalize(Src, Legal_Reg | Legal_Imm); | 5401 Src = legalize(Src, Legal_Reg | Legal_Imm); |
5423 _xor_rmw(Addr, Src); | 5402 _xor_rmw(Addr, Src); |
5424 return; | 5403 return; |
5425 } | 5404 } |
5426 } | 5405 } |
5427 llvm::report_fatal_error("Couldn't lower RMW instruction"); | 5406 llvm::report_fatal_error("Couldn't lower RMW instruction"); |
5428 } | 5407 } |
5429 | 5408 |
5430 template <class Machine> | 5409 template <typename TraitsType> |
5431 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { | 5410 void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) { |
5432 if (const auto *RMW = | 5411 if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) { |
5433 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) { | |
5434 lowerRMW(RMW); | 5412 lowerRMW(RMW); |
5435 } else { | 5413 } else { |
5436 TargetLowering::lowerOther(Instr); | 5414 TargetLowering::lowerOther(Instr); |
5437 } | 5415 } |
5438 } | 5416 } |
5439 | 5417 |
5440 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve | 5418 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve |
5441 /// integrity of liveness analysis. Undef values are also turned into zeroes, | 5419 /// integrity of liveness analysis. Undef values are also turned into zeroes, |
5442 /// since loOperand() and hiOperand() don't expect Undef input. | 5420 /// since loOperand() and hiOperand() don't expect Undef input. |
5443 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { | 5421 template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() { |
5444 if (Traits::Is64Bit) { | 5422 if (Traits::Is64Bit) { |
5445 // On x86-64 we don't need to prelower phis -- the architecture can handle | 5423 // On x86-64 we don't need to prelower phis -- the architecture can handle |
5446 // 64-bit integer natively. | 5424 // 64-bit integer natively. |
5447 return; | 5425 return; |
5448 } | 5426 } |
5449 | 5427 |
5450 // Pause constant blinding or pooling, blinding or pooling will be done later | 5428 // Pause constant blinding or pooling, blinding or pooling will be done later |
5451 // during phi lowering assignments | 5429 // during phi lowering assignments |
5452 BoolFlagSaver B(RandomizationPoolingPaused, true); | 5430 BoolFlagSaver B(RandomizationPoolingPaused, true); |
5453 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( | 5431 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>( |
5454 this, Context.getNode(), Func); | 5432 this, Context.getNode(), Func); |
5455 } | 5433 } |
5456 | 5434 |
5457 template <class Machine> | 5435 template <typename TraitsType> |
5458 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) { | 5436 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { |
5459 uint32_t StackArgumentsSize = 0; | 5437 uint32_t StackArgumentsSize = 0; |
5460 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | 5438 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { |
5461 const char *HelperName = nullptr; | 5439 const char *HelperName = nullptr; |
5462 Variable *Dest = Arith->getDest(); | 5440 Variable *Dest = Arith->getDest(); |
5463 Type DestTy = Dest->getType(); | 5441 Type DestTy = Dest->getType(); |
5464 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 5442 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
5465 switch (Arith->getOp()) { | 5443 switch (Arith->getOp()) { |
5466 default: | 5444 default: |
5467 return; | 5445 return; |
5468 case InstArithmetic::Udiv: | 5446 case InstArithmetic::Udiv: |
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5673 if (!isScalarFloatingType(ReturnType)) | 5651 if (!isScalarFloatingType(ReturnType)) |
5674 return; | 5652 return; |
5675 StackArgumentsSize = typeWidthInBytes(ReturnType); | 5653 StackArgumentsSize = typeWidthInBytes(ReturnType); |
5676 } else { | 5654 } else { |
5677 return; | 5655 return; |
5678 } | 5656 } |
5679 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize); | 5657 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize); |
5680 updateMaxOutArgsSizeBytes(StackArgumentsSize); | 5658 updateMaxOutArgsSizeBytes(StackArgumentsSize); |
5681 } | 5659 } |
5682 | 5660 |
5683 template <class Machine> | 5661 template <typename TraitsType> |
5684 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes( | 5662 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes( |
5685 const std::vector<Type> &ArgTypes, Type ReturnType) { | 5663 const std::vector<Type> &ArgTypes, Type ReturnType) { |
5686 uint32_t OutArgumentsSizeBytes = 0; | 5664 uint32_t OutArgumentsSizeBytes = 0; |
5687 uint32_t XmmArgCount = 0; | 5665 uint32_t XmmArgCount = 0; |
5688 uint32_t GprArgCount = 0; | 5666 uint32_t GprArgCount = 0; |
5689 for (Type Ty : ArgTypes) { | 5667 for (Type Ty : ArgTypes) { |
5690 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 5668 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
5691 assert(typeWidthInBytes(Ty) >= 4); | 5669 assert(typeWidthInBytes(Ty) >= 4); |
5692 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { | 5670 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { |
5693 ++XmmArgCount; | 5671 ++XmmArgCount; |
5694 } else if (isScalarIntegerType(Ty) && | 5672 } else if (isScalarIntegerType(Ty) && |
(...skipping 13 matching lines...) Expand all Loading... | |
5708 // The 32 bit ABI requires floating point values to be returned on the x87 FP | 5686 // The 32 bit ABI requires floating point values to be returned on the x87 FP |
5709 // stack. Ensure there is enough space for the fstp/movs for floating returns. | 5687 // stack. Ensure there is enough space for the fstp/movs for floating returns. |
5710 if (isScalarFloatingType(ReturnType)) { | 5688 if (isScalarFloatingType(ReturnType)) { |
5711 OutArgumentsSizeBytes = | 5689 OutArgumentsSizeBytes = |
5712 std::max(OutArgumentsSizeBytes, | 5690 std::max(OutArgumentsSizeBytes, |
5713 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType))); | 5691 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType))); |
5714 } | 5692 } |
5715 return OutArgumentsSizeBytes; | 5693 return OutArgumentsSizeBytes; |
5716 } | 5694 } |
5717 | 5695 |
5718 template <class Machine> | 5696 template <typename TraitsType> |
5719 uint32_t | 5697 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes( |
5720 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { | 5698 const InstCall *Instr) { |
5721 // Build a vector of the arguments' types. | 5699 // Build a vector of the arguments' types. |
5722 std::vector<Type> ArgTypes; | 5700 std::vector<Type> ArgTypes; |
5723 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 5701 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
5724 Operand *Arg = Instr->getArg(i); | 5702 Operand *Arg = Instr->getArg(i); |
5725 ArgTypes.emplace_back(Arg->getType()); | 5703 ArgTypes.emplace_back(Arg->getType()); |
5726 } | 5704 } |
5727 // Compute the return type (if any); | 5705 // Compute the return type (if any); |
5728 Type ReturnType = IceType_void; | 5706 Type ReturnType = IceType_void; |
5729 Variable *Dest = Instr->getDest(); | 5707 Variable *Dest = Instr->getDest(); |
5730 if (Dest != nullptr) | 5708 if (Dest != nullptr) |
5731 ReturnType = Dest->getType(); | 5709 ReturnType = Dest->getType(); |
5732 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); | 5710 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); |
5733 } | 5711 } |
5734 | 5712 |
5735 template <class Machine> | 5713 template <typename TraitsType> |
5736 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { | 5714 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty, |
5715 int32_t RegNum) { | |
5737 Variable *Reg = makeReg(Ty, RegNum); | 5716 Variable *Reg = makeReg(Ty, RegNum); |
5738 switch (Ty) { | 5717 switch (Ty) { |
5739 case IceType_i1: | 5718 case IceType_i1: |
5740 case IceType_i8: | 5719 case IceType_i8: |
5741 case IceType_i16: | 5720 case IceType_i16: |
5742 case IceType_i32: | 5721 case IceType_i32: |
5743 case IceType_i64: | 5722 case IceType_i64: |
5744 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 5723 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
5745 _mov(Reg, Ctx->getConstantZero(Ty)); | 5724 _mov(Reg, Ctx->getConstantZero(Ty)); |
5746 break; | 5725 break; |
(...skipping 12 matching lines...) Expand all Loading... | |
5759 return Reg; | 5738 return Reg; |
5760 } | 5739 } |
5761 | 5740 |
5762 // There is no support for loading or emitting vector constants, so the vector | 5741 // There is no support for loading or emitting vector constants, so the vector |
5763 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are | 5742 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are |
5764 // initialized with register operations. | 5743 // initialized with register operations. |
5765 // | 5744 // |
5766 // TODO(wala): Add limited support for vector constants so that complex | 5745 // TODO(wala): Add limited support for vector constants so that complex |
5767 // initialization in registers is unnecessary. | 5746 // initialization in registers is unnecessary. |
5768 | 5747 |
5769 template <class Machine> | 5748 template <typename TraitsType> |
5770 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5749 Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty, |
5750 int32_t RegNum) { | |
5771 return makeZeroedRegister(Ty, RegNum); | 5751 return makeZeroedRegister(Ty, RegNum); |
5772 } | 5752 } |
5773 | 5753 |
5774 template <class Machine> | 5754 template <typename TraitsType> |
5775 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, | 5755 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
5776 int32_t RegNum) { | 5756 int32_t RegNum) { |
5777 Variable *MinusOnes = makeReg(Ty, RegNum); | 5757 Variable *MinusOnes = makeReg(Ty, RegNum); |
5778 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5758 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
5779 Context.insert<InstFakeDef>(MinusOnes); | 5759 Context.insert<InstFakeDef>(MinusOnes); |
5780 _pcmpeq(MinusOnes, MinusOnes); | 5760 _pcmpeq(MinusOnes, MinusOnes); |
5781 return MinusOnes; | 5761 return MinusOnes; |
5782 } | 5762 } |
5783 | 5763 |
5784 template <class Machine> | 5764 template <typename TraitsType> |
5785 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5765 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
5786 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5766 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
5787 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5767 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
5788 _psub(Dest, MinusOne); | 5768 _psub(Dest, MinusOne); |
5789 return Dest; | 5769 return Dest; |
5790 } | 5770 } |
5791 | 5771 |
5792 template <class Machine> | 5772 template <typename TraitsType> |
5793 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, | 5773 Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty, |
5794 int32_t RegNum) { | 5774 int32_t RegNum) { |
5795 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 5775 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
5796 Ty == IceType_v16i8); | 5776 Ty == IceType_v16i8); |
5797 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 5777 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
5798 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 5778 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
5799 SizeT Shift = | 5779 SizeT Shift = |
5800 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 5780 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
5801 _psll(Reg, Ctx->getConstantInt8(Shift)); | 5781 _psll(Reg, Ctx->getConstantInt8(Shift)); |
5802 return Reg; | 5782 return Reg; |
5803 } else { | 5783 } else { |
5804 // SSE has no left shift operation for vectors of 8 bit integers. | 5784 // SSE has no left shift operation for vectors of 8 bit integers. |
5805 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 5785 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
5806 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 5786 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
5807 Variable *Reg = makeReg(Ty, RegNum); | 5787 Variable *Reg = makeReg(Ty, RegNum); |
5808 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 5788 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
5809 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 5789 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
5810 return Reg; | 5790 return Reg; |
5811 } | 5791 } |
5812 } | 5792 } |
5813 | 5793 |
5814 /// Construct a mask in a register that can be and'ed with a floating-point | 5794 /// Construct a mask in a register that can be and'ed with a floating-point |
5815 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 5795 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
5816 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of | 5796 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of |
5817 /// ones logically right shifted one bit. | 5797 /// ones logically right shifted one bit. |
5818 // TODO(stichnot): Fix the wala | 5798 // TODO(stichnot): Fix the wala |
5819 // TODO: above, to represent vector constants in memory. | 5799 // TODO: above, to represent vector constants in memory. |
5820 template <class Machine> | 5800 template <typename TraitsType> |
5821 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, | 5801 Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty, |
5822 int32_t RegNum) { | 5802 int32_t RegNum) { |
5823 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); | 5803 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); |
5824 _psrl(Reg, Ctx->getConstantInt8(1)); | 5804 _psrl(Reg, Ctx->getConstantInt8(1)); |
5825 return Reg; | 5805 return Reg; |
5826 } | 5806 } |
5827 | 5807 |
5828 template <class Machine> | 5808 template <typename TraitsType> |
5829 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 5809 typename TargetX86Base<TraitsType>::X86OperandMem * |
5830 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 5810 TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
5831 uint32_t Offset) { | 5811 uint32_t Offset) { |
5832 // Ensure that Loc is a stack slot. | 5812 // Ensure that Loc is a stack slot. |
5833 assert(Slot->mustNotHaveReg()); | 5813 assert(Slot->mustNotHaveReg()); |
5834 assert(Slot->getRegNum() == Variable::NoRegister); | 5814 assert(Slot->getRegNum() == Variable::NoRegister); |
5835 // Compute the location of Loc in memory. | 5815 // Compute the location of Loc in memory. |
5836 // TODO(wala,stichnot): lea should not | 5816 // TODO(wala,stichnot): lea should not |
5837 // be required. The address of the stack slot is known at compile time | 5817 // be required. The address of the stack slot is known at compile time |
5838 // (although not until after addProlog()). | 5818 // (although not until after addProlog()). |
5839 constexpr Type PointerType = IceType_i32; | 5819 constexpr Type PointerType = IceType_i32; |
5840 Variable *Loc = makeReg(PointerType); | 5820 Variable *Loc = makeReg(PointerType); |
5841 _lea(Loc, Slot); | 5821 _lea(Loc, Slot); |
5842 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5822 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
5843 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); | 5823 return X86OperandMem::create(Func, Ty, Loc, ConstantOffset); |
5844 } | 5824 } |
5845 | 5825 |
5846 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR. | 5826 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR. |
5847 /// Src is assumed to already be legalized. If the source operand is known to | 5827 /// Src is assumed to already be legalized. If the source operand is known to |
5848 /// be a memory or immediate operand, a simple mov will suffice. But if the | 5828 /// be a memory or immediate operand, a simple mov will suffice. But if the |
5849 /// source operand can be a physical register, then it must first be copied into | 5829 /// source operand can be a physical register, then it must first be copied into |
5850 /// a physical register that is truncable to 8-bit, then truncated into a | 5830 /// a physical register that is truncable to 8-bit, then truncated into a |
5851 /// physical register that can receive a truncation, and finally copied into the | 5831 /// physical register that can receive a truncation, and finally copied into the |
5852 /// result 8-bit register (which in general can be any 8-bit register). For | 5832 /// result 8-bit register (which in general can be any 8-bit register). For |
5853 /// example, moving %ebp into %ah may be accomplished as: | 5833 /// example, moving %ebp into %ah may be accomplished as: |
(...skipping 10 matching lines...) Expand all Loading... | |
5864 /// Reg_ah. | 5844 /// Reg_ah. |
5865 /// | 5845 /// |
5866 /// Note #2. ConstantRelocatable operands are also put through this process | 5846 /// Note #2. ConstantRelocatable operands are also put through this process |
5867 /// (not truncated directly) because our ELF emitter does R_386_32 relocations | 5847 /// (not truncated directly) because our ELF emitter does R_386_32 relocations |
5868 /// but not R_386_8 relocations. | 5848 /// but not R_386_8 relocations. |
5869 /// | 5849 /// |
5870 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8 | 5850 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8 |
5871 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper | 5851 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper |
5872 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument | 5852 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument |
5873 /// to the pinsrb instruction. | 5853 /// to the pinsrb instruction. |
5874 template <class Machine> | 5854 template <typename TraitsType> |
5875 Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) { | 5855 Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, int32_t RegNum) { |
5876 Type Ty = Src->getType(); | 5856 Type Ty = Src->getType(); |
5877 assert(isScalarIntegerType(Ty)); | 5857 assert(isScalarIntegerType(Ty)); |
5878 assert(Ty != IceType_i1); | 5858 assert(Ty != IceType_i1); |
5879 Variable *Reg = makeReg(IceType_i8, RegNum); | 5859 Variable *Reg = makeReg(IceType_i8, RegNum); |
5880 Reg->setRegClass(RCX86_IsTrunc8Rcvr); | 5860 Reg->setRegClass(RCX86_IsTrunc8Rcvr); |
5881 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) { | 5861 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) { |
5882 Variable *SrcTruncable = makeReg(Ty); | 5862 Variable *SrcTruncable = makeReg(Ty); |
5883 switch (Ty) { | 5863 switch (Ty) { |
5884 case IceType_i64: | 5864 case IceType_i64: |
5885 SrcTruncable->setRegClass(RCX86_Is64To8); | 5865 SrcTruncable->setRegClass(RCX86_Is64To8); |
(...skipping 13 matching lines...) Expand all Loading... | |
5899 _mov(SrcTruncable, Src); | 5879 _mov(SrcTruncable, Src); |
5900 _mov(SrcRcvr, SrcTruncable); | 5880 _mov(SrcRcvr, SrcTruncable); |
5901 Src = SrcRcvr; | 5881 Src = SrcRcvr; |
5902 } | 5882 } |
5903 _mov(Reg, Src); | 5883 _mov(Reg, Src); |
5904 return Reg; | 5884 return Reg; |
5905 } | 5885 } |
5906 | 5886 |
5907 /// Helper for legalize() to emit the right code to lower an operand to a | 5887 /// Helper for legalize() to emit the right code to lower an operand to a |
5908 /// register of the appropriate type. | 5888 /// register of the appropriate type. |
5909 template <class Machine> | 5889 template <typename TraitsType> |
5910 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5890 Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, int32_t RegNum) { |
5911 Type Ty = Src->getType(); | 5891 Type Ty = Src->getType(); |
5912 Variable *Reg = makeReg(Ty, RegNum); | 5892 Variable *Reg = makeReg(Ty, RegNum); |
5913 if (isVectorType(Ty)) { | 5893 if (isVectorType(Ty)) { |
5914 _movp(Reg, Src); | 5894 _movp(Reg, Src); |
5915 } else { | 5895 } else { |
5916 _mov(Reg, Src); | 5896 _mov(Reg, Src); |
5917 } | 5897 } |
5918 return Reg; | 5898 return Reg; |
5919 } | 5899 } |
5920 | 5900 |
5921 template <class Machine> | 5901 template <typename TraitsType> |
5922 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, | 5902 Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed, |
5923 int32_t RegNum) { | 5903 int32_t RegNum) { |
5924 Type Ty = From->getType(); | 5904 Type Ty = From->getType(); |
5925 // Assert that a physical register is allowed. To date, all calls to | 5905 // Assert that a physical register is allowed. To date, all calls to |
5926 // legalize() allow a physical register. If a physical register needs to be | 5906 // legalize() allow a physical register. If a physical register needs to be |
5927 // explicitly disallowed, then new code will need to be written to force a | 5907 // explicitly disallowed, then new code will need to be written to force a |
5928 // spill. | 5908 // spill. |
5929 assert(Allowed & Legal_Reg); | 5909 assert(Allowed & Legal_Reg); |
5930 // If we're asking for a specific physical register, make sure we're not | 5910 // If we're asking for a specific physical register, make sure we're not |
5931 // allowing any other operand kinds. (This could be future work, e.g. allow | 5911 // allowing any other operand kinds. (This could be future work, e.g. allow |
5932 // the shl shift amount to be either an immediate or in ecx.) | 5912 // the shl shift amount to be either an immediate or in ecx.) |
5933 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); | 5913 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); |
5934 | 5914 |
5935 // Substitute with an available infinite-weight variable if possible. Only do | 5915 // Substitute with an available infinite-weight variable if possible. Only do |
5936 // this when we are not asking for a specific register, and when the | 5916 // this when we are not asking for a specific register, and when the |
5937 // substitution is not locked to a specific register, and when the types | 5917 // substitution is not locked to a specific register, and when the types |
5938 // match, in order to capture the vast majority of opportunities and avoid | 5918 // match, in order to capture the vast majority of opportunities and avoid |
5939 // corner cases in the lowering. | 5919 // corner cases in the lowering. |
5940 if (RegNum == Variable::NoRegister) { | 5920 if (RegNum == Variable::NoRegister) { |
5941 if (Variable *Subst = getContext().availabilityGet(From)) { | 5921 if (Variable *Subst = getContext().availabilityGet(From)) { |
5942 // At this point we know there is a potential substitution available. | 5922 // At this point we know there is a potential substitution available. |
5943 if (Subst->mustHaveReg() && !Subst->hasReg()) { | 5923 if (Subst->mustHaveReg() && !Subst->hasReg()) { |
5944 // At this point we know the substitution will have a register. | 5924 // At this point we know the substitution will have a register. |
5945 if (From->getType() == Subst->getType()) { | 5925 if (From->getType() == Subst->getType()) { |
5946 // At this point we know the substitution's register is compatible. | 5926 // At this point we know the substitution's register is compatible. |
5947 return Subst; | 5927 return Subst; |
5948 } | 5928 } |
5949 } | 5929 } |
5950 } | 5930 } |
5951 } | 5931 } |
5952 | 5932 |
5953 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 5933 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) { |
5954 // Before doing anything with a Mem operand, we need to ensure that the | 5934 // Before doing anything with a Mem operand, we need to ensure that the |
5955 // Base and Index components are in physical registers. | 5935 // Base and Index components are in physical registers. |
5956 Variable *Base = Mem->getBase(); | 5936 Variable *Base = Mem->getBase(); |
5957 Variable *Index = Mem->getIndex(); | 5937 Variable *Index = Mem->getIndex(); |
5958 Variable *RegBase = nullptr; | 5938 Variable *RegBase = nullptr; |
5959 Variable *RegIndex = nullptr; | 5939 Variable *RegIndex = nullptr; |
5960 if (Base) { | 5940 if (Base) { |
5961 RegBase = llvm::cast<Variable>( | 5941 RegBase = llvm::cast<Variable>( |
5962 legalize(Base, Legal_Reg | Legal_Rematerializable)); | 5942 legalize(Base, Legal_Reg | Legal_Rematerializable)); |
5963 } | 5943 } |
5964 if (Index) { | 5944 if (Index) { |
5965 RegIndex = llvm::cast<Variable>( | 5945 RegIndex = llvm::cast<Variable>( |
5966 legalize(Index, Legal_Reg | Legal_Rematerializable)); | 5946 legalize(Index, Legal_Reg | Legal_Rematerializable)); |
5967 } | 5947 } |
5968 if (Base != RegBase || Index != RegIndex) { | 5948 if (Base != RegBase || Index != RegIndex) { |
5969 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), | 5949 Mem = X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex, |
5970 RegIndex, Mem->getShift(), | 5950 Mem->getShift(), Mem->getSegmentRegister()); |
5971 Mem->getSegmentRegister()); | |
5972 } | 5951 } |
5973 | 5952 |
5974 // For all Memory Operands, we do randomization/pooling here | 5953 // For all Memory Operands, we do randomization/pooling here |
5975 From = randomizeOrPoolImmediate(Mem); | 5954 From = randomizeOrPoolImmediate(Mem); |
5976 | 5955 |
5977 if (!(Allowed & Legal_Mem)) { | 5956 if (!(Allowed & Legal_Mem)) { |
5978 From = copyToReg(From, RegNum); | 5957 From = copyToReg(From, RegNum); |
5979 } | 5958 } |
5980 return From; | 5959 return From; |
5981 } | 5960 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6018 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { | 5997 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { |
6019 if (Utils::isPositiveZero(ConstDouble->getValue())) | 5998 if (Utils::isPositiveZero(ConstDouble->getValue())) |
6020 return makeZeroedRegister(Ty, RegNum); | 5999 return makeZeroedRegister(Ty, RegNum); |
6021 } | 6000 } |
6022 Variable *Base = nullptr; | 6001 Variable *Base = nullptr; |
6023 std::string Buffer; | 6002 std::string Buffer; |
6024 llvm::raw_string_ostream StrBuf(Buffer); | 6003 llvm::raw_string_ostream StrBuf(Buffer); |
6025 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 6004 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
6026 llvm::cast<Constant>(From)->setShouldBePooled(true); | 6005 llvm::cast<Constant>(From)->setShouldBePooled(true); |
6027 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 6006 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
6028 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 6007 From = X86OperandMem::create(Func, Ty, Base, Offset); |
6029 } | 6008 } |
6030 bool NeedsReg = false; | 6009 bool NeedsReg = false; |
6031 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) | 6010 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) |
6032 // Immediate specifically not allowed | 6011 // Immediate specifically not allowed |
6033 NeedsReg = true; | 6012 NeedsReg = true; |
6034 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) | 6013 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) |
6035 // On x86, FP constants are lowered to mem operands. | 6014 // On x86, FP constants are lowered to mem operands. |
6036 NeedsReg = true; | 6015 NeedsReg = true; |
6037 if (NeedsReg) { | 6016 if (NeedsReg) { |
6038 From = copyToReg(From, RegNum); | 6017 From = copyToReg(From, RegNum); |
(...skipping 11 matching lines...) Expand all Loading... | |
6050 // - Mem is not allowed and Var isn't guaranteed a physical register, or | 6029 // - Mem is not allowed and Var isn't guaranteed a physical register, or |
6051 // - RegNum is required and Var->getRegNum() doesn't match, or | 6030 // - RegNum is required and Var->getRegNum() doesn't match, or |
6052 // - Var is a rematerializable variable and rematerializable pass-through is | 6031 // - Var is a rematerializable variable and rematerializable pass-through is |
6053 // not allowed (in which case we need an lea instruction). | 6032 // not allowed (in which case we need an lea instruction). |
6054 if (MustRematerialize) { | 6033 if (MustRematerialize) { |
6055 assert(Ty == IceType_i32); | 6034 assert(Ty == IceType_i32); |
6056 Variable *NewVar = makeReg(Ty, RegNum); | 6035 Variable *NewVar = makeReg(Ty, RegNum); |
6057 // Since Var is rematerializable, the offset will be added when the lea is | 6036 // Since Var is rematerializable, the offset will be added when the lea is |
6058 // emitted. | 6037 // emitted. |
6059 constexpr Constant *NoOffset = nullptr; | 6038 constexpr Constant *NoOffset = nullptr; |
6060 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset); | 6039 auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset); |
6061 _lea(NewVar, Mem); | 6040 _lea(NewVar, Mem); |
6062 From = NewVar; | 6041 From = NewVar; |
6063 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 6042 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
6064 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) || | 6043 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) || |
6065 MustRematerialize) { | 6044 MustRematerialize) { |
6066 From = copyToReg(From, RegNum); | 6045 From = copyToReg(From, RegNum); |
6067 } | 6046 } |
6068 return From; | 6047 return From; |
6069 } | 6048 } |
6070 llvm_unreachable("Unhandled operand kind in legalize()"); | 6049 llvm_unreachable("Unhandled operand kind in legalize()"); |
6071 return From; | 6050 return From; |
6072 } | 6051 } |
6073 | 6052 |
6074 /// Provide a trivial wrapper to legalize() for this common usage. | 6053 /// Provide a trivial wrapper to legalize() for this common usage. |
6075 template <class Machine> | 6054 template <typename TraitsType> |
6076 Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) { | 6055 Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From, |
6056 int32_t RegNum) { | |
6077 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 6057 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
6078 } | 6058 } |
6079 | 6059 |
6080 /// Legalize undef values to concrete values. | 6060 /// Legalize undef values to concrete values. |
6081 template <class Machine> | 6061 template <typename TraitsType> |
6082 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { | 6062 Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From, |
6063 int32_t RegNum) { | |
6083 Type Ty = From->getType(); | 6064 Type Ty = From->getType(); |
6084 if (llvm::isa<ConstantUndef>(From)) { | 6065 if (llvm::isa<ConstantUndef>(From)) { |
6085 // Lower undefs to zero. Another option is to lower undefs to an | 6066 // Lower undefs to zero. Another option is to lower undefs to an |
6086 // uninitialized register; however, using an uninitialized register results | 6067 // uninitialized register; however, using an uninitialized register results |
6087 // in less predictable code. | 6068 // in less predictable code. |
6088 // | 6069 // |
6089 // If in the future the implementation is changed to lower undef values to | 6070 // If in the future the implementation is changed to lower undef values to |
6090 // uninitialized registers, a FakeDef will be needed: | 6071 // uninitialized registers, a FakeDef will be needed: |
6091 // Context.insert<InstFakeDef>(Reg); | 6072 // Context.insert<InstFakeDef>(Reg); |
6092 // This is in order to ensure that the live range of Reg is not | 6073 // This is in order to ensure that the live range of Reg is not |
6093 // overestimated. If the constant being lowered is a 64 bit value, then | 6074 // overestimated. If the constant being lowered is a 64 bit value, then |
6094 // the result should be split and the lo and hi components will need to go | 6075 // the result should be split and the lo and hi components will need to go |
6095 // in uninitialized registers. | 6076 // in uninitialized registers. |
6096 if (isVectorType(Ty)) | 6077 if (isVectorType(Ty)) |
6097 return makeVectorOfZeros(Ty, RegNum); | 6078 return makeVectorOfZeros(Ty, RegNum); |
6098 return Ctx->getConstantZero(Ty); | 6079 return Ctx->getConstantZero(Ty); |
6099 } | 6080 } |
6100 return From; | 6081 return From; |
6101 } | 6082 } |
6102 | 6083 |
6103 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical | 6084 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical |
6104 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be | 6085 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be |
6105 /// copied into a physical register. (Actually, either Src0 or Src1 can be | 6086 /// copied into a physical register. (Actually, either Src0 or Src1 can be |
6106 /// chosen for the physical register, but unfortunately we have to commit to one | 6087 /// chosen for the physical register, but unfortunately we have to commit to one |
6107 /// or the other before register allocation.) | 6088 /// or the other before register allocation.) |
6108 template <class Machine> | 6089 template <typename TraitsType> |
6109 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, | 6090 Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0, |
6110 Operand *Src1) { | 6091 Operand *Src1) { |
6111 bool IsSrc1ImmOrReg = false; | 6092 bool IsSrc1ImmOrReg = false; |
6112 if (llvm::isa<Constant>(Src1)) { | 6093 if (llvm::isa<Constant>(Src1)) { |
6113 IsSrc1ImmOrReg = true; | 6094 IsSrc1ImmOrReg = true; |
6114 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) { | 6095 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) { |
6115 if (Var->hasReg()) | 6096 if (Var->hasReg()) |
6116 IsSrc1ImmOrReg = true; | 6097 IsSrc1ImmOrReg = true; |
6117 } | 6098 } |
6118 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); | 6099 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
6119 } | 6100 } |
6120 | 6101 |
6121 template <class Machine> | 6102 template <typename TraitsType> |
6122 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 6103 typename TargetX86Base<TraitsType>::X86OperandMem * |
6123 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty, | 6104 TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty, |
6124 bool DoLegalize) { | 6105 bool DoLegalize) { |
6125 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd); | 6106 auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd); |
6126 // It may be the case that address mode optimization already creates an | 6107 // It may be the case that address mode optimization already creates an |
6127 // Traits::X86OperandMem, so in that case it wouldn't need another level of | 6108 // X86OperandMem, so in that case it wouldn't need another level of |
6128 // transformation. | 6109 // transformation. |
6129 if (!Mem) { | 6110 if (!Mem) { |
6130 auto *Base = llvm::dyn_cast<Variable>(Opnd); | 6111 auto *Base = llvm::dyn_cast<Variable>(Opnd); |
6131 auto *Offset = llvm::dyn_cast<Constant>(Opnd); | 6112 auto *Offset = llvm::dyn_cast<Constant>(Opnd); |
6132 assert(Base || Offset); | 6113 assert(Base || Offset); |
6133 if (Offset) { | 6114 if (Offset) { |
6134 // During memory operand building, we do not blind or pool the constant | 6115 // During memory operand building, we do not blind or pool the constant |
6135 // offset, we will work on the whole memory operand later as one entity | 6116 // offset, we will work on the whole memory operand later as one entity |
6136 // later, this save one instruction. By turning blinding and pooling off, | 6117 // later, this save one instruction. By turning blinding and pooling off, |
6137 // we guarantee legalize(Offset) will return a Constant*. | 6118 // we guarantee legalize(Offset) will return a Constant*. |
6138 { | 6119 { |
6139 BoolFlagSaver B(RandomizationPoolingPaused, true); | 6120 BoolFlagSaver B(RandomizationPoolingPaused, true); |
6140 | 6121 |
6141 Offset = llvm::cast<Constant>(legalize(Offset)); | 6122 Offset = llvm::cast<Constant>(legalize(Offset)); |
6142 } | 6123 } |
6143 | 6124 |
6144 assert(llvm::isa<ConstantInteger32>(Offset) || | 6125 assert(llvm::isa<ConstantInteger32>(Offset) || |
6145 llvm::isa<ConstantRelocatable>(Offset)); | 6126 llvm::isa<ConstantRelocatable>(Offset)); |
6146 } | 6127 } |
6147 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 6128 Mem = X86OperandMem::create(Func, Ty, Base, Offset); |
6148 } | 6129 } |
6149 // Do legalization, which contains randomization/pooling or do | 6130 // Do legalization, which contains randomization/pooling or do |
6150 // randomization/pooling. | 6131 // randomization/pooling. |
6151 return llvm::cast<typename Traits::X86OperandMem>( | 6132 return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem) |
6152 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); | 6133 : randomizeOrPoolImmediate(Mem)); |
6153 } | 6134 } |
6154 | 6135 |
6155 template <class Machine> | 6136 template <typename TraitsType> |
6156 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { | 6137 Variable *TargetX86Base<TraitsType>::makeReg(Type Type, int32_t RegNum) { |
6157 // There aren't any 64-bit integer registers for x86-32. | 6138 // There aren't any 64-bit integer registers for x86-32. |
6158 assert(Traits::Is64Bit || Type != IceType_i64); | 6139 assert(Traits::Is64Bit || Type != IceType_i64); |
6159 Variable *Reg = Func->makeVariable(Type); | 6140 Variable *Reg = Func->makeVariable(Type); |
6160 if (RegNum == Variable::NoRegister) | 6141 if (RegNum == Variable::NoRegister) |
6161 Reg->setMustHaveReg(); | 6142 Reg->setMustHaveReg(); |
6162 else | 6143 else |
6163 Reg->setRegNum(RegNum); | 6144 Reg->setRegNum(RegNum); |
6164 return Reg; | 6145 return Reg; |
6165 } | 6146 } |
6166 | 6147 |
6167 template <class Machine> | 6148 template <typename TraitsType> |
6168 const Type TargetX86Base<Machine>::TypeForSize[] = { | 6149 const Type TargetX86Base<TraitsType>::TypeForSize[] = { |
6169 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; | 6150 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; |
6170 template <class Machine> | 6151 template <typename TraitsType> |
6171 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, | 6152 Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size, |
6172 uint32_t MaxSize) { | 6153 uint32_t MaxSize) { |
6173 assert(Size != 0); | 6154 assert(Size != 0); |
6174 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6155 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
6175 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6156 uint32_t MaxIndex = MaxSize == NoSizeLimit |
6176 ? llvm::array_lengthof(TypeForSize) - 1 | 6157 ? llvm::array_lengthof(TypeForSize) - 1 |
6177 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6158 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
6178 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6159 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
6179 } | 6160 } |
6180 | 6161 |
6181 template <class Machine> | 6162 template <typename TraitsType> |
6182 Type TargetX86Base<Machine>::firstTypeThatFitsSize(uint32_t Size, | 6163 Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size, |
6183 uint32_t MaxSize) { | 6164 uint32_t MaxSize) { |
6184 assert(Size != 0); | 6165 assert(Size != 0); |
6185 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6166 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
6186 if (!llvm::isPowerOf2_32(Size)) | 6167 if (!llvm::isPowerOf2_32(Size)) |
6187 ++TyIndex; | 6168 ++TyIndex; |
6188 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6169 uint32_t MaxIndex = MaxSize == NoSizeLimit |
6189 ? llvm::array_lengthof(TypeForSize) - 1 | 6170 ? llvm::array_lengthof(TypeForSize) - 1 |
6190 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6171 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
6191 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6172 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
6192 } | 6173 } |
6193 | 6174 |
6194 template <class Machine> void TargetX86Base<Machine>::postLower() { | 6175 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() { |
6195 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 6176 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
6196 return; | 6177 return; |
6197 markRedefinitions(); | 6178 markRedefinitions(); |
6198 Context.availabilityUpdate(); | 6179 Context.availabilityUpdate(); |
6199 } | 6180 } |
6200 | 6181 |
6201 template <class Machine> | 6182 template <typename TraitsType> |
6202 void TargetX86Base<Machine>::makeRandomRegisterPermutation( | 6183 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation( |
6203 llvm::SmallVectorImpl<int32_t> &Permutation, | 6184 llvm::SmallVectorImpl<int32_t> &Permutation, |
6204 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const { | 6185 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const { |
6205 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation, | 6186 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation, |
6206 ExcludeRegisters, Salt); | 6187 ExcludeRegisters, Salt); |
6207 } | 6188 } |
6208 | 6189 |
6209 template <class Machine> | 6190 template <typename TraitsType> |
6210 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { | 6191 void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const { |
6211 if (!BuildDefs::dump()) | 6192 if (!BuildDefs::dump()) |
6212 return; | 6193 return; |
6213 Ostream &Str = Ctx->getStrEmit(); | 6194 Ostream &Str = Ctx->getStrEmit(); |
6214 Str << getConstantPrefix() << C->getValue(); | 6195 Str << getConstantPrefix() << C->getValue(); |
6215 } | 6196 } |
6216 | 6197 |
6217 template <class Machine> | 6198 template <typename TraitsType> |
6218 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const { | 6199 void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const { |
6219 if (!Traits::Is64Bit) { | 6200 if (!Traits::Is64Bit) { |
6220 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 6201 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); |
6221 } else { | 6202 } else { |
6222 if (!BuildDefs::dump()) | 6203 if (!BuildDefs::dump()) |
6223 return; | 6204 return; |
6224 Ostream &Str = Ctx->getStrEmit(); | 6205 Ostream &Str = Ctx->getStrEmit(); |
6225 Str << getConstantPrefix() << C->getValue(); | 6206 Str << getConstantPrefix() << C->getValue(); |
6226 } | 6207 } |
6227 } | 6208 } |
6228 | 6209 |
6229 template <class Machine> | 6210 template <typename TraitsType> |
6230 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { | 6211 void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const { |
6231 if (!BuildDefs::dump()) | 6212 if (!BuildDefs::dump()) |
6232 return; | 6213 return; |
6233 Ostream &Str = Ctx->getStrEmit(); | 6214 Ostream &Str = Ctx->getStrEmit(); |
6234 C->emitPoolLabel(Str, Ctx); | 6215 C->emitPoolLabel(Str, Ctx); |
6235 } | 6216 } |
6236 | 6217 |
6237 template <class Machine> | 6218 template <typename TraitsType> |
6238 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const { | 6219 void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const { |
6239 if (!BuildDefs::dump()) | 6220 if (!BuildDefs::dump()) |
6240 return; | 6221 return; |
6241 Ostream &Str = Ctx->getStrEmit(); | 6222 Ostream &Str = Ctx->getStrEmit(); |
6242 C->emitPoolLabel(Str, Ctx); | 6223 C->emitPoolLabel(Str, Ctx); |
6243 } | 6224 } |
6244 | 6225 |
6245 template <class Machine> | 6226 template <typename TraitsType> |
6246 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { | 6227 void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const { |
6247 llvm::report_fatal_error("undef value encountered by emitter."); | 6228 llvm::report_fatal_error("undef value encountered by emitter."); |
6248 } | 6229 } |
6249 | 6230 |
6250 /// Randomize or pool an Immediate. | 6231 /// Randomize or pool an Immediate. |
6251 template <class Machine> | 6232 template <typename TraitsType> |
6252 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, | 6233 Operand * |
6253 int32_t RegNum) { | 6234 TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate, |
6235 int32_t RegNum) { | |
6254 assert(llvm::isa<ConstantInteger32>(Immediate) || | 6236 assert(llvm::isa<ConstantInteger32>(Immediate) || |
6255 llvm::isa<ConstantRelocatable>(Immediate)); | 6237 llvm::isa<ConstantRelocatable>(Immediate)); |
6256 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 6238 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
6257 RandomizationPoolingPaused == true) { | 6239 RandomizationPoolingPaused == true) { |
6258 // Immediates randomization/pooling off or paused | 6240 // Immediates randomization/pooling off or paused |
6259 return Immediate; | 6241 return Immediate; |
6260 } | 6242 } |
6261 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { | 6243 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { |
6262 Ctx->statsUpdateRPImms(); | 6244 Ctx->statsUpdateRPImms(); |
6263 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == | 6245 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == |
6264 RPI_Randomize) { | 6246 RPI_Randomize) { |
6265 // blind the constant | 6247 // blind the constant |
6266 // FROM: | 6248 // FROM: |
6267 // imm | 6249 // imm |
6268 // TO: | 6250 // TO: |
6269 // insert: mov imm+cookie, Reg | 6251 // insert: mov imm+cookie, Reg |
6270 // insert: lea -cookie[Reg], Reg | 6252 // insert: lea -cookie[Reg], Reg |
6271 // => Reg | 6253 // => Reg |
6272 // If we have already assigned a phy register, we must come from | 6254 // If we have already assigned a phy register, we must come from |
6273 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 6255 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
6274 // assigned register as this assignment is that start of its use-def | 6256 // assigned register as this assignment is that start of its use-def |
6275 // chain. So we add RegNum argument here. Note we use 'lea' instruction | 6257 // chain. So we add RegNum argument here. Note we use 'lea' instruction |
6276 // instead of 'xor' to avoid affecting the flags. | 6258 // instead of 'xor' to avoid affecting the flags. |
6277 Variable *Reg = makeReg(IceType_i32, RegNum); | 6259 Variable *Reg = makeReg(IceType_i32, RegNum); |
6278 auto *Integer = llvm::cast<ConstantInteger32>(Immediate); | 6260 auto *Integer = llvm::cast<ConstantInteger32>(Immediate); |
6279 uint32_t Value = Integer->getValue(); | 6261 uint32_t Value = Integer->getValue(); |
6280 uint32_t Cookie = Func->getConstantBlindingCookie(); | 6262 uint32_t Cookie = Func->getConstantBlindingCookie(); |
6281 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); | 6263 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); |
6282 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); | 6264 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); |
6283 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset, | 6265 _lea(Reg, |
6284 nullptr, 0)); | 6266 X86OperandMem::create(Func, IceType_i32, Reg, Offset, nullptr, 0)); |
6285 if (Immediate->getType() != IceType_i32) { | 6267 if (Immediate->getType() != IceType_i32) { |
6286 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); | 6268 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); |
6287 _mov(TruncReg, Reg); | 6269 _mov(TruncReg, Reg); |
6288 return TruncReg; | 6270 return TruncReg; |
6289 } | 6271 } |
6290 return Reg; | 6272 return Reg; |
6291 } | 6273 } |
6292 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { | 6274 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { |
6293 // pool the constant | 6275 // pool the constant |
6294 // FROM: | 6276 // FROM: |
6295 // imm | 6277 // imm |
6296 // TO: | 6278 // TO: |
6297 // insert: mov $label, Reg | 6279 // insert: mov $label, Reg |
6298 // => Reg | 6280 // => Reg |
6299 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); | 6281 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); |
6300 Immediate->setShouldBePooled(true); | 6282 Immediate->setShouldBePooled(true); |
6301 // if we have already assigned a phy register, we must come from | 6283 // if we have already assigned a phy register, we must come from |
6302 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 6284 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
6303 // assigned register as this assignment is that start of its use-def | 6285 // assigned register as this assignment is that start of its use-def |
6304 // chain. So we add RegNum argument here. | 6286 // chain. So we add RegNum argument here. |
6305 Variable *Reg = makeReg(Immediate->getType(), RegNum); | 6287 Variable *Reg = makeReg(Immediate->getType(), RegNum); |
6306 IceString Label; | 6288 IceString Label; |
6307 llvm::raw_string_ostream Label_stream(Label); | 6289 llvm::raw_string_ostream Label_stream(Label); |
6308 Immediate->emitPoolLabel(Label_stream, Ctx); | 6290 Immediate->emitPoolLabel(Label_stream, Ctx); |
6309 constexpr RelocOffsetT Offset = 0; | 6291 constexpr RelocOffsetT Offset = 0; |
6310 constexpr bool SuppressMangling = true; | 6292 constexpr bool SuppressMangling = true; |
6311 Constant *Symbol = | 6293 Constant *Symbol = |
6312 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); | 6294 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); |
6313 typename Traits::X86OperandMem *MemOperand = | 6295 X86OperandMem *MemOperand = |
6314 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, | 6296 X86OperandMem::create(Func, Immediate->getType(), nullptr, Symbol); |
6315 Symbol); | |
6316 _mov(Reg, MemOperand); | 6297 _mov(Reg, MemOperand); |
6317 return Reg; | 6298 return Reg; |
6318 } | 6299 } |
6319 assert("Unsupported -randomize-pool-immediates option" && false); | 6300 assert("Unsupported -randomize-pool-immediates option" && false); |
6320 } | 6301 } |
6321 // the constant Immediate is not eligible for blinding/pooling | 6302 // the constant Immediate is not eligible for blinding/pooling |
6322 return Immediate; | 6303 return Immediate; |
6323 } | 6304 } |
6324 | 6305 |
6325 template <class Machine> | 6306 template <typename TraitsType> |
6326 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 6307 typename TargetX86Base<TraitsType>::X86OperandMem * |
6327 TargetX86Base<Machine>::randomizeOrPoolImmediate( | 6308 TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand, |
6328 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) { | 6309 int32_t RegNum) { |
6329 assert(MemOperand); | 6310 assert(MemOperand); |
6330 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 6311 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
6331 RandomizationPoolingPaused == true) { | 6312 RandomizationPoolingPaused == true) { |
6332 // immediates randomization/pooling is turned off | 6313 // immediates randomization/pooling is turned off |
6333 return MemOperand; | 6314 return MemOperand; |
6334 } | 6315 } |
6335 | 6316 |
6336 // If this memory operand is already a randomized one, we do not randomize it | 6317 // If this memory operand is already a randomized one, we do not randomize it |
6337 // again. | 6318 // again. |
6338 if (MemOperand->getRandomized()) | 6319 if (MemOperand->getRandomized()) |
(...skipping 13 matching lines...) Expand all Loading... | |
6352 // => -cookie[RegTemp, index, shift] | 6333 // => -cookie[RegTemp, index, shift] |
6353 uint32_t Value = | 6334 uint32_t Value = |
6354 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) | 6335 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) |
6355 ->getValue(); | 6336 ->getValue(); |
6356 uint32_t Cookie = Func->getConstantBlindingCookie(); | 6337 uint32_t Cookie = Func->getConstantBlindingCookie(); |
6357 Constant *Mask1 = Ctx->getConstantInt( | 6338 Constant *Mask1 = Ctx->getConstantInt( |
6358 MemOperand->getOffset()->getType(), Cookie + Value); | 6339 MemOperand->getOffset()->getType(), Cookie + Value); |
6359 Constant *Mask2 = | 6340 Constant *Mask2 = |
6360 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); | 6341 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); |
6361 | 6342 |
6362 typename Traits::X86OperandMem *TempMemOperand = | 6343 X86OperandMem *TempMemOperand = X86OperandMem::create( |
6363 Traits::X86OperandMem::create(Func, MemOperand->getType(), | 6344 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); |
6364 MemOperand->getBase(), Mask1); | |
6365 // If we have already assigned a physical register, we must come from | 6345 // If we have already assigned a physical register, we must come from |
6366 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse | 6346 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse |
6367 // the assigned register as this assignment is that start of its | 6347 // the assigned register as this assignment is that start of its |
6368 // use-def chain. So we add RegNum argument here. | 6348 // use-def chain. So we add RegNum argument here. |
6369 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); | 6349 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); |
6370 _lea(RegTemp, TempMemOperand); | 6350 _lea(RegTemp, TempMemOperand); |
6371 | 6351 |
6372 typename Traits::X86OperandMem *NewMemOperand = | 6352 X86OperandMem *NewMemOperand = X86OperandMem::create( |
6373 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, | 6353 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(), |
6374 Mask2, MemOperand->getIndex(), | 6354 MemOperand->getShift(), MemOperand->getSegmentRegister()); |
6375 MemOperand->getShift(), | |
6376 MemOperand->getSegmentRegister()); | |
6377 | 6355 |
6378 // Label this memory operand as randomized, so we won't randomize it | 6356 // Label this memory operand as randomized, so we won't randomize it |
6379 // again in case we call legalize() multiple times on this memory | 6357 // again in case we call legalize() multiple times on this memory |
6380 // operand. | 6358 // operand. |
6381 NewMemOperand->setRandomized(true); | 6359 NewMemOperand->setRandomized(true); |
6382 return NewMemOperand; | 6360 return NewMemOperand; |
6383 } | 6361 } |
6384 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { | 6362 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { |
6385 // pool the constant offset | 6363 // pool the constant offset |
6386 // FROM: | 6364 // FROM: |
(...skipping 13 matching lines...) Expand all Loading... | |
6400 return MemOperand; | 6378 return MemOperand; |
6401 Variable *RegTemp = makeReg(IceType_i32); | 6379 Variable *RegTemp = makeReg(IceType_i32); |
6402 IceString Label; | 6380 IceString Label; |
6403 llvm::raw_string_ostream Label_stream(Label); | 6381 llvm::raw_string_ostream Label_stream(Label); |
6404 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); | 6382 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); |
6405 MemOperand->getOffset()->setShouldBePooled(true); | 6383 MemOperand->getOffset()->setShouldBePooled(true); |
6406 constexpr RelocOffsetT SymOffset = 0; | 6384 constexpr RelocOffsetT SymOffset = 0; |
6407 constexpr bool SuppressMangling = true; | 6385 constexpr bool SuppressMangling = true; |
6408 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), | 6386 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), |
6409 SuppressMangling); | 6387 SuppressMangling); |
6410 typename Traits::X86OperandMem *SymbolOperand = | 6388 X86OperandMem *SymbolOperand = X86OperandMem::create( |
6411 Traits::X86OperandMem::create( | 6389 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); |
6412 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); | |
6413 _mov(RegTemp, SymbolOperand); | 6390 _mov(RegTemp, SymbolOperand); |
6414 // If we have a base variable here, we should add the lea instruction | 6391 // If we have a base variable here, we should add the lea instruction |
6415 // to add the value of the base variable to RegTemp. If there is no | 6392 // to add the value of the base variable to RegTemp. If there is no |
6416 // base variable, we won't need this lea instruction. | 6393 // base variable, we won't need this lea instruction. |
6417 if (MemOperand->getBase()) { | 6394 if (MemOperand->getBase()) { |
6418 typename Traits::X86OperandMem *CalculateOperand = | 6395 X86OperandMem *CalculateOperand = X86OperandMem::create( |
6419 Traits::X86OperandMem::create( | 6396 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, |
6420 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, | 6397 RegTemp, 0, MemOperand->getSegmentRegister()); |
6421 RegTemp, 0, MemOperand->getSegmentRegister()); | |
6422 _lea(RegTemp, CalculateOperand); | 6398 _lea(RegTemp, CalculateOperand); |
6423 } | 6399 } |
6424 typename Traits::X86OperandMem *NewMemOperand = | 6400 X86OperandMem *NewMemOperand = X86OperandMem::create( |
6425 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, | 6401 Func, MemOperand->getType(), RegTemp, nullptr, |
6426 nullptr, MemOperand->getIndex(), | 6402 MemOperand->getIndex(), MemOperand->getShift(), |
6427 MemOperand->getShift(), | 6403 MemOperand->getSegmentRegister()); |
6428 MemOperand->getSegmentRegister()); | |
6429 return NewMemOperand; | 6404 return NewMemOperand; |
6430 } | 6405 } |
6431 assert("Unsupported -randomize-pool-immediates option" && false); | 6406 assert("Unsupported -randomize-pool-immediates option" && false); |
6432 } | 6407 } |
6433 } | 6408 } |
6434 // the offset is not eligible for blinding or pooling, return the original | 6409 // the offset is not eligible for blinding or pooling, return the original |
6435 // mem operand | 6410 // mem operand |
6436 return MemOperand; | 6411 return MemOperand; |
6437 } | 6412 } |
6438 | 6413 |
6439 } // end of namespace X86Internal | 6414 } // end of namespace X86NAMESPACE |
6440 } // end of namespace Ice | 6415 } // end of namespace Ice |
6441 | 6416 |
6442 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6417 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |