Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(58)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1548363002: Subzero. Code organization. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// \brief Implements the TargetLoweringX86Base class, which consists almost 11 /// \brief Implements the TargetLoweringX86Base class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction. 12 /// entirely of the lowering sequence for each high-level instruction.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18 18
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceClFlags.h" 21 #include "IceClFlags.h"
22 #include "IceDefs.h" 22 #include "IceDefs.h"
23 #include "IceELFObjectWriter.h" 23 #include "IceELFObjectWriter.h"
24 #include "IceGlobalInits.h" 24 #include "IceGlobalInits.h"
25 #include "IceInstVarIter.h" 25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h" 26 #include "IceLiveness.h"
27 #include "IceOperand.h" 27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h" 28 #include "IcePhiLoweringImpl.h"
29 #include "IceUtils.h" 29 #include "IceUtils.h"
30 #include "IceInstX86Base.h"
Jim Stichnoth 2016/01/03 18:20:02 alphabetize includes
30 #include "llvm/Support/MathExtras.h" 31 #include "llvm/Support/MathExtras.h"
31 32
32 #include <stack> 33 #include <stack>
33 34
34 namespace Ice { 35 namespace Ice {
35 namespace X86Internal { 36 namespace X86NAMESPACE {
36 37
37 /// A helper class to ease the settings of RandomizationPoolingPause to disable 38 /// A helper class to ease the settings of RandomizationPoolingPause to disable
38 /// constant blinding or pooling for some translation phases. 39 /// constant blinding or pooling for some translation phases.
39 class BoolFlagSaver { 40 class BoolFlagSaver {
40 BoolFlagSaver() = delete; 41 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete; 42 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43 44
44 public: 45 public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; } 47 ~BoolFlagSaver() { Flag = OldValue; }
47 48
48 private: 49 private:
49 const bool OldValue; 50 const bool OldValue;
50 bool &Flag; 51 bool &Flag;
51 }; 52 };
52 53
53 template <class MachineTraits> class BoolFoldingEntry { 54 template <typename Traits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; 55 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55 56
56 public: 57 public:
57 BoolFoldingEntry() = default; 58 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I); 59 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; 60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
60 /// Instr is the instruction producing the i1-type variable of interest. 61 /// Instr is the instruction producing the i1-type variable of interest.
61 Inst *Instr = nullptr; 62 Inst *Instr = nullptr;
62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). 63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
63 bool IsComplex = false; 64 bool IsComplex = false;
64 /// IsLiveOut is initialized conservatively to true, and is set to false when 65 /// IsLiveOut is initialized conservatively to true, and is set to false when
65 /// we encounter an instruction that ends Var's live range. We disable the 66 /// we encounter an instruction that ends Var's live range. We disable the
66 /// folding optimization when Var is live beyond this basic block. Note that 67 /// folding optimization when Var is live beyond this basic block. Note that
67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will 68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 /// always be true and the folding optimization will never be performed. 69 /// always be true and the folding optimization will never be performed.
69 bool IsLiveOut = true; 70 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the 71 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var, 72 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var. 73 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0; 74 uint32_t NumUses = 0;
74 }; 75 };
75 76
76 template <class MachineTraits> class BoolFolding { 77 template <typename Traits> class BoolFolding {
77 public: 78 public:
78 enum BoolFoldingProducerKind { 79 enum BoolFoldingProducerKind {
79 PK_None, 80 PK_None,
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. 81 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
81 PK_Icmp32, 82 PK_Icmp32,
82 PK_Icmp64, 83 PK_Icmp64,
83 PK_Fcmp, 84 PK_Fcmp,
84 PK_Trunc, 85 PK_Trunc,
85 PK_Arith // A flag-setting arithmetic instruction. 86 PK_Arith // A flag-setting arithmetic instruction.
86 }; 87 };
(...skipping 19 matching lines...) Expand all
106 void dump(const Cfg *Func) const; 107 void dump(const Cfg *Func) const;
107 108
108 private: 109 private:
109 /// Returns true if Producers contains a valid entry for the given VarNum. 110 /// Returns true if Producers contains a valid entry for the given VarNum.
110 bool containsValid(SizeT VarNum) const { 111 bool containsValid(SizeT VarNum) const {
111 auto Element = Producers.find(VarNum); 112 auto Element = Producers.find(VarNum);
112 return Element != Producers.end() && Element->second.Instr != nullptr; 113 return Element != Producers.end() && Element->second.Instr != nullptr;
113 } 114 }
114 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } 115 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
115 /// Producers maps Variable::Number to a BoolFoldingEntry. 116 /// Producers maps Variable::Number to a BoolFoldingEntry.
116 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; 117 std::unordered_map<SizeT, BoolFoldingEntry<Traits>> Producers;
117 }; 118 };
118 119
119 template <class MachineTraits> 120 template <typename Traits>
120 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) 121 BoolFoldingEntry<Traits>::BoolFoldingEntry(Inst *I)
121 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 122 : Instr(I), IsComplex(BoolFolding<Traits>::hasComplexLowering(I)) {}
122 123
123 template <class MachineTraits> 124 template <typename Traits>
124 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 125 typename BoolFolding<Traits>::BoolFoldingProducerKind
125 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 126 BoolFolding<Traits>::getProducerKind(const Inst *Instr) {
126 if (llvm::isa<InstIcmp>(Instr)) { 127 if (llvm::isa<InstIcmp>(Instr)) {
127 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) 128 if (Traits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
128 return PK_Icmp32; 129 return PK_Icmp32;
129 return PK_Icmp64; 130 return PK_Icmp64;
130 } 131 }
131 if (llvm::isa<InstFcmp>(Instr)) 132 if (llvm::isa<InstFcmp>(Instr))
132 return PK_Fcmp; 133 return PK_Fcmp;
133 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { 134 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
134 if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) { 135 if (Traits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
135 switch (Arith->getOp()) { 136 switch (Arith->getOp()) {
136 default: 137 default:
137 return PK_None; 138 return PK_None;
138 case InstArithmetic::And: 139 case InstArithmetic::And:
139 case InstArithmetic::Or: 140 case InstArithmetic::Or:
140 return PK_Arith; 141 return PK_Arith;
141 } 142 }
142 } 143 }
143 } 144 }
144 return PK_None; // TODO(stichnot): remove this 145 return PK_None; // TODO(stichnot): remove this
145 146
146 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 147 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
147 switch (Cast->getCastKind()) { 148 switch (Cast->getCastKind()) {
148 default: 149 default:
149 return PK_None; 150 return PK_None;
150 case InstCast::Trunc: 151 case InstCast::Trunc:
151 return PK_Trunc; 152 return PK_Trunc;
152 } 153 }
153 } 154 }
154 return PK_None; 155 return PK_None;
155 } 156 }
156 157
157 template <class MachineTraits> 158 template <typename Traits>
158 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind 159 typename BoolFolding<Traits>::BoolFoldingConsumerKind
159 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) { 160 BoolFolding<Traits>::getConsumerKind(const Inst *Instr) {
160 if (llvm::isa<InstBr>(Instr)) 161 if (llvm::isa<InstBr>(Instr))
161 return CK_Br; 162 return CK_Br;
162 if (llvm::isa<InstSelect>(Instr)) 163 if (llvm::isa<InstSelect>(Instr))
163 return CK_Select; 164 return CK_Select;
164 return CK_None; // TODO(stichnot): remove this 165 return CK_None; // TODO(stichnot): remove this
165 166
166 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 167 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
167 switch (Cast->getCastKind()) { 168 switch (Cast->getCastKind()) {
168 default: 169 default:
169 return CK_None; 170 return CK_None;
170 case InstCast::Sext: 171 case InstCast::Sext:
171 return CK_Sext; 172 return CK_Sext;
172 case InstCast::Zext: 173 case InstCast::Zext:
173 return CK_Zext; 174 return CK_Zext;
174 } 175 }
175 } 176 }
176 return CK_None; 177 return CK_None;
177 } 178 }
178 179
179 /// Returns true if the producing instruction has a "complex" lowering sequence. 180 /// Returns true if the producing instruction has a "complex" lowering sequence.
180 /// This generally means that its lowering sequence requires more than one 181 /// This generally means that its lowering sequence requires more than one
181 /// conditional branch, namely 64-bit integer compares and some floating-point 182 /// conditional branch, namely 64-bit integer compares and some floating-point
182 /// compares. When this is true, and there is more than one consumer, we prefer 183 /// compares. When this is true, and there is more than one consumer, we prefer
183 /// to disable the folding optimization because it minimizes branches. 184 /// to disable the folding optimization because it minimizes branches.
184 template <class MachineTraits> 185 template <typename Traits>
185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 186 bool BoolFolding<Traits>::hasComplexLowering(const Inst *Instr) {
186 switch (getProducerKind(Instr)) { 187 switch (getProducerKind(Instr)) {
187 default: 188 default:
188 return false; 189 return false;
189 case PK_Icmp64: 190 case PK_Icmp64:
190 return !MachineTraits::Is64Bit; 191 return !Traits::Is64Bit;
191 case PK_Fcmp: 192 case PK_Fcmp:
192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 193 return Traits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
193 .C2 != MachineTraits::Cond::Br_None; 194 Traits::Cond::Br_None;
194 } 195 }
195 } 196 }
196 197
197 template <class MachineTraits> 198 template <typename Traits>
198 bool BoolFolding<MachineTraits>::isValidFolding( 199 bool BoolFolding<Traits>::isValidFolding(
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, 200 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind,
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { 201 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind) {
201 switch (ProducerKind) { 202 switch (ProducerKind) {
202 default: 203 default:
203 return false; 204 return false;
204 case PK_Icmp32: 205 case PK_Icmp32:
205 case PK_Icmp64: 206 case PK_Icmp64:
206 case PK_Fcmp: 207 case PK_Fcmp:
207 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); 208 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
208 case PK_Arith: 209 case PK_Arith:
209 return ConsumerKind == CK_Br; 210 return ConsumerKind == CK_Br;
210 } 211 }
211 } 212 }
212 213
213 template <class MachineTraits> 214 template <typename Traits> void BoolFolding<Traits>::init(CfgNode *Node) {
214 void BoolFolding<MachineTraits>::init(CfgNode *Node) {
215 Producers.clear(); 215 Producers.clear();
216 for (Inst &Instr : Node->getInsts()) { 216 for (Inst &Instr : Node->getInsts()) {
217 // Check whether Instr is a valid producer. 217 // Check whether Instr is a valid producer.
218 Variable *Var = Instr.getDest(); 218 Variable *Var = Instr.getDest();
219 if (!Instr.isDeleted() // only consider non-deleted instructions 219 if (!Instr.isDeleted() // only consider non-deleted instructions
220 && Var // only instructions with an actual dest var 220 && Var // only instructions with an actual dest var
221 && Var->getType() == IceType_i1 // only bool-type dest vars 221 && Var->getType() == IceType_i1 // only bool-type dest vars
222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
223 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); 223 Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr);
224 } 224 }
225 // Check each src variable against the map. 225 // Check each src variable against the map.
226 FOREACH_VAR_IN_INST(Var, Instr) { 226 FOREACH_VAR_IN_INST(Var, Instr) {
227 SizeT VarNum = Var->getIndex(); 227 SizeT VarNum = Var->getIndex();
228 if (!containsValid(VarNum)) 228 if (!containsValid(VarNum))
229 continue; 229 continue;
230 // All valid consumers use Var as the first source operand 230 // All valid consumers use Var as the first source operand
231 if (IndexOfVarOperandInInst(Var) != 0) { 231 if (IndexOfVarOperandInInst(Var) != 0) {
232 setInvalid(VarNum); 232 setInvalid(VarNum);
233 continue; 233 continue;
234 } 234 }
235 // Consumer instructions must be white-listed 235 // Consumer instructions must be white-listed
236 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind 236 typename BoolFolding<Traits>::BoolFoldingConsumerKind ConsumerKind =
237 ConsumerKind = getConsumerKind(&Instr); 237 getConsumerKind(&Instr);
238 if (ConsumerKind == CK_None) { 238 if (ConsumerKind == CK_None) {
239 setInvalid(VarNum); 239 setInvalid(VarNum);
240 continue; 240 continue;
241 } 241 }
242 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 242 typename BoolFolding<Traits>::BoolFoldingProducerKind ProducerKind =
243 ProducerKind = getProducerKind(Producers[VarNum].Instr); 243 getProducerKind(Producers[VarNum].Instr);
244 if (!isValidFolding(ProducerKind, ConsumerKind)) { 244 if (!isValidFolding(ProducerKind, ConsumerKind)) {
245 setInvalid(VarNum); 245 setInvalid(VarNum);
246 continue; 246 continue;
247 } 247 }
248 // Avoid creating multiple copies of complex producer instructions. 248 // Avoid creating multiple copies of complex producer instructions.
249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { 249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
250 setInvalid(VarNum); 250 setInvalid(VarNum);
251 continue; 251 continue;
252 } 252 }
253 ++Producers[VarNum].NumUses; 253 ++Producers[VarNum].NumUses;
(...skipping 12 matching lines...) Expand all
266 continue; 266 continue;
267 } 267 }
268 // Mark as "dead" rather than outright deleting. This is so that other 268 // Mark as "dead" rather than outright deleting. This is so that other
269 // peephole style optimizations during or before lowering have access to 269 // peephole style optimizations during or before lowering have access to
270 // this instruction in undeleted form. See for example 270 // this instruction in undeleted form. See for example
271 // tryOptimizedCmpxchgCmpBr(). 271 // tryOptimizedCmpxchgCmpBr().
272 I.second.Instr->setDead(); 272 I.second.Instr->setDead();
273 } 273 }
274 } 274 }
275 275
276 template <class MachineTraits> 276 template <typename Traits>
277 const Inst * 277 const Inst *BoolFolding<Traits>::getProducerFor(const Operand *Opnd) const {
278 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
279 auto *Var = llvm::dyn_cast<const Variable>(Opnd); 278 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
280 if (Var == nullptr) 279 if (Var == nullptr)
281 return nullptr; 280 return nullptr;
282 SizeT VarNum = Var->getIndex(); 281 SizeT VarNum = Var->getIndex();
283 auto Element = Producers.find(VarNum); 282 auto Element = Producers.find(VarNum);
284 if (Element == Producers.end()) 283 if (Element == Producers.end())
285 return nullptr; 284 return nullptr;
286 return Element->second.Instr; 285 return Element->second.Instr;
287 } 286 }
288 287
289 template <class MachineTraits> 288 template <typename Traits>
290 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const { 289 void BoolFolding<Traits>::dump(const Cfg *Func) const {
291 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding)) 290 if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
292 return; 291 return;
293 OstreamLocker L(Func->getContext()); 292 OstreamLocker L(Func->getContext());
294 Ostream &Str = Func->getContext()->getStrDump(); 293 Ostream &Str = Func->getContext()->getStrDump();
295 for (auto &I : Producers) { 294 for (auto &I : Producers) {
296 if (I.second.Instr == nullptr) 295 if (I.second.Instr == nullptr)
297 continue; 296 continue;
298 Str << "Found foldable producer:\n "; 297 Str << "Found foldable producer:\n ";
299 I.second.Instr->dump(Func); 298 I.second.Instr->dump(Func);
300 Str << "\n"; 299 Str << "\n";
301 } 300 }
302 } 301 }
303 302
304 template <class Machine> 303 template <typename TraitsType>
305 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) { 304 void TargetX86Base<TraitsType>::initNodeForLowering(CfgNode *Node) {
306 FoldingInfo.init(Node); 305 FoldingInfo.init(Node);
307 FoldingInfo.dump(Func); 306 FoldingInfo.dump(Func);
308 } 307 }
309 308
310 template <class Machine> 309 template <typename TraitsType>
311 TargetX86Base<Machine>::TargetX86Base(Cfg *Func) 310 TargetX86Base<TraitsType>::TargetX86Base(Cfg *Func)
312 : TargetLowering(Func) { 311 : TargetLowering(Func) {
313 static_assert( 312 static_assert(
314 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == 313 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
315 (TargetInstructionSet::X86InstructionSet_End - 314 (TargetInstructionSet::X86InstructionSet_End -
316 TargetInstructionSet::X86InstructionSet_Begin), 315 TargetInstructionSet::X86InstructionSet_Begin),
317 "Traits::InstructionSet range different from TargetInstructionSet"); 316 "Traits::InstructionSet range different from TargetInstructionSet");
318 if (Func->getContext()->getFlags().getTargetInstructionSet() != 317 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
319 TargetInstructionSet::BaseInstructionSet) { 318 TargetInstructionSet::BaseInstructionSet) {
320 InstructionSet = static_cast<typename Traits::InstructionSet>( 319 InstructionSet = static_cast<InstructionSetEnum>(
321 (Func->getContext()->getFlags().getTargetInstructionSet() - 320 (Func->getContext()->getFlags().getTargetInstructionSet() -
322 TargetInstructionSet::X86InstructionSet_Begin) + 321 TargetInstructionSet::X86InstructionSet_Begin) +
323 Traits::InstructionSet::Begin); 322 Traits::InstructionSet::Begin);
324 } 323 }
325 } 324 }
326 325
327 template <class Machine> void TargetX86Base<Machine>::staticInit() { 326 template <typename TraitsType> void TargetX86Base<TraitsType>::staticInit() {
328 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); 327 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);
329 } 328 }
330 329
331 template <class Machine> void TargetX86Base<Machine>::translateO2() { 330 template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
332 TimerMarker T(TimerStack::TT_O2, Func); 331 TimerMarker T(TimerStack::TT_O2, Func);
333 332
334 genTargetHelperCalls(); 333 genTargetHelperCalls();
335 Func->dump("After target helper call insertion"); 334 Func->dump("After target helper call insertion");
336 335
337 // Merge Alloca instructions, and lay out the stack. 336 // Merge Alloca instructions, and lay out the stack.
338 static constexpr bool SortAndCombineAllocas = true; 337 static constexpr bool SortAndCombineAllocas = true;
339 Func->processAllocas(SortAndCombineAllocas); 338 Func->processAllocas(SortAndCombineAllocas);
340 Func->dump("After Alloca processing"); 339 Func->dump("After Alloca processing");
341 340
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
447 Func->dump("After branch optimization"); 446 Func->dump("After branch optimization");
448 447
449 // Nop insertion if -nop-insertion is enabled. 448 // Nop insertion if -nop-insertion is enabled.
450 Func->doNopInsertion(); 449 Func->doNopInsertion();
451 450
452 // Mark nodes that require sandbox alignment 451 // Mark nodes that require sandbox alignment
453 if (Ctx->getFlags().getUseSandboxing()) 452 if (Ctx->getFlags().getUseSandboxing())
454 Func->markNodesForSandboxing(); 453 Func->markNodesForSandboxing();
455 } 454 }
456 455
457 template <class Machine> void TargetX86Base<Machine>::translateOm1() { 456 template <typename TraitsType> void TargetX86Base<TraitsType>::translateOm1() {
458 TimerMarker T(TimerStack::TT_Om1, Func); 457 TimerMarker T(TimerStack::TT_Om1, Func);
459 458
460 genTargetHelperCalls(); 459 genTargetHelperCalls();
461 460
462 // Do not merge Alloca instructions, and lay out the stack. 461 // Do not merge Alloca instructions, and lay out the stack.
463 static constexpr bool SortAndCombineAllocas = false; 462 static constexpr bool SortAndCombineAllocas = false;
464 Func->processAllocas(SortAndCombineAllocas); 463 Func->processAllocas(SortAndCombineAllocas);
465 Func->dump("After Alloca processing"); 464 Func->dump("After Alloca processing");
466 465
467 Func->placePhiLoads(); 466 Func->placePhiLoads();
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 case InstArithmetic::Xor: 523 case InstArithmetic::Xor:
525 return true; 524 return true;
526 case InstArithmetic::Shl: 525 case InstArithmetic::Shl:
527 case InstArithmetic::Lshr: 526 case InstArithmetic::Lshr:
528 case InstArithmetic::Ashr: 527 case InstArithmetic::Ashr:
529 return false; // TODO(stichnot): implement 528 return false; // TODO(stichnot): implement
530 return !isI64; 529 return !isI64;
531 } 530 }
532 } 531 }
533 532
534 template <class Machine> 533 template <typename TraitsType>
535 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { 534 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
536 if (A == B) 535 if (A == B)
537 return true; 536 return true;
538 if (auto *MemA = llvm::dyn_cast< 537 if (auto *MemA =
539 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) { 538 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
540 if (auto *MemB = llvm::dyn_cast< 539 A)) {
541 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) { 540 if (auto *MemB =
541 llvm::dyn_cast<typename TargetX86Base<TraitsType>::X86OperandMem>(
542 B)) {
542 return MemA->getBase() == MemB->getBase() && 543 return MemA->getBase() == MemB->getBase() &&
543 MemA->getOffset() == MemB->getOffset() && 544 MemA->getOffset() == MemB->getOffset() &&
544 MemA->getIndex() == MemB->getIndex() && 545 MemA->getIndex() == MemB->getIndex() &&
545 MemA->getShift() == MemB->getShift() && 546 MemA->getShift() == MemB->getShift() &&
546 MemA->getSegmentRegister() == MemB->getSegmentRegister(); 547 MemA->getSegmentRegister() == MemB->getSegmentRegister();
547 } 548 }
548 } 549 }
549 return false; 550 return false;
550 } 551 }
551 552
552 template <class Machine> void TargetX86Base<Machine>::findRMW() { 553 template <typename TraitsType> void TargetX86Base<TraitsType>::findRMW() {
553 Func->dump("Before RMW"); 554 Func->dump("Before RMW");
554 if (Func->isVerbose(IceV_RMW)) 555 if (Func->isVerbose(IceV_RMW))
555 Func->getContext()->lockStr(); 556 Func->getContext()->lockStr();
556 for (CfgNode *Node : Func->getNodes()) { 557 for (CfgNode *Node : Func->getNodes()) {
557 // Walk through the instructions, considering each sequence of 3 558 // Walk through the instructions, considering each sequence of 3
558 // instructions, and look for the particular RMW pattern. Note that this 559 // instructions, and look for the particular RMW pattern. Note that this
559 // search can be "broken" (false negatives) if there are intervening 560 // search can be "broken" (false negatives) if there are intervening
560 // deleted instructions, or intervening instructions that could be safely 561 // deleted instructions, or intervening instructions that could be safely
561 // moved out of the way to reveal an RMW pattern. 562 // moved out of the way to reveal an RMW pattern.
562 auto E = Node->getInsts().end(); 563 auto E = Node->getInsts().end();
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
596 // x's live range, and therefore the RMW instruction will be retained and 597 // x's live range, and therefore the RMW instruction will be retained and
597 // later lowered. On the other hand, if the RMW instruction does not end 598 // later lowered. On the other hand, if the RMW instruction does not end
598 // x's live range, then the Store instruction must still be present, and 599 // x's live range, then the Store instruction must still be present, and
599 // therefore the RMW instruction is ignored during lowering because it is 600 // therefore the RMW instruction is ignored during lowering because it is
600 // redundant with the Store instruction. 601 // redundant with the Store instruction.
601 // 602 //
602 // Note that if "a" has further uses, the RMW transformation may still 603 // Note that if "a" has further uses, the RMW transformation may still
603 // trigger, resulting in two loads and one store, which is worse than the 604 // trigger, resulting in two loads and one store, which is worse than the
604 // original one load and one store. However, this is probably rare, and 605 // original one load and one store. However, this is probably rare, and
605 // caching probably keeps it just as fast. 606 // caching probably keeps it just as fast.
606 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), 607 if (!isSameMemAddressOperand<TraitsType>(Load->getSourceAddress(),
607 Store->getAddr())) 608 Store->getAddr()))
608 continue; 609 continue;
609 Operand *ArithSrcFromLoad = Arith->getSrc(0); 610 Operand *ArithSrcFromLoad = Arith->getSrc(0);
610 Operand *ArithSrcOther = Arith->getSrc(1); 611 Operand *ArithSrcOther = Arith->getSrc(1);
611 if (ArithSrcFromLoad != Load->getDest()) { 612 if (ArithSrcFromLoad != Load->getDest()) {
612 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) 613 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
613 continue; 614 continue;
614 std::swap(ArithSrcFromLoad, ArithSrcOther); 615 std::swap(ArithSrcFromLoad, ArithSrcOther);
615 } 616 }
616 if (Arith->getDest() != Store->getData()) 617 if (Arith->getDest() != Store->getData())
617 continue; 618 continue;
618 if (!canRMW(Arith)) 619 if (!canRMW(Arith))
619 continue; 620 continue;
620 if (Func->isVerbose(IceV_RMW)) { 621 if (Func->isVerbose(IceV_RMW)) {
621 Ostream &Str = Func->getContext()->getStrDump(); 622 Ostream &Str = Func->getContext()->getStrDump();
622 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; 623 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
623 Load->dump(Func); 624 Load->dump(Func);
624 Str << "\n "; 625 Str << "\n ";
625 Arith->dump(Func); 626 Arith->dump(Func);
626 Str << "\n "; 627 Str << "\n ";
627 Store->dump(Func); 628 Store->dump(Func);
628 Str << "\n"; 629 Str << "\n";
629 } 630 }
630 Variable *Beacon = Func->makeVariable(IceType_i32); 631 Variable *Beacon = Func->makeVariable(IceType_i32);
631 Beacon->setMustNotHaveReg(); 632 Beacon->setMustNotHaveReg();
632 Store->setRmwBeacon(Beacon); 633 Store->setRmwBeacon(Beacon);
633 auto *BeaconDef = InstFakeDef::create(Func, Beacon); 634 auto *BeaconDef = InstFakeDef::create(Func, Beacon);
634 Node->getInsts().insert(I3, BeaconDef); 635 Node->getInsts().insert(I3, BeaconDef);
635 auto *RMW = Traits::Insts::FakeRMW::create( 636 auto *RMW = InstX86FakeRMW::create(Func, ArithSrcOther, Store->getAddr(),
636 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); 637 Beacon, Arith->getOp());
637 Node->getInsts().insert(I3, RMW); 638 Node->getInsts().insert(I3, RMW);
638 } 639 }
639 } 640 }
640 if (Func->isVerbose(IceV_RMW)) 641 if (Func->isVerbose(IceV_RMW))
641 Func->getContext()->unlockStr(); 642 Func->getContext()->unlockStr();
642 } 643 }
643 644
644 // Converts a ConstantInteger32 operand into its constant value, or 645 // Converts a ConstantInteger32 operand into its constant value, or
645 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 646 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
646 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 647 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
(...skipping 12 matching lines...) Expand all
659 Src0 = LoadSrc; 660 Src0 = LoadSrc;
660 return true; 661 return true;
661 } 662 }
662 if (Src0 != LoadDest && Src1 == LoadDest) { 663 if (Src0 != LoadDest && Src1 == LoadDest) {
663 Src1 = LoadSrc; 664 Src1 = LoadSrc;
664 return true; 665 return true;
665 } 666 }
666 return false; 667 return false;
667 } 668 }
668 669
669 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { 670 template <typename TraitsType> void TargetX86Base<TraitsType>::doLoadOpt() {
670 for (CfgNode *Node : Func->getNodes()) { 671 for (CfgNode *Node : Func->getNodes()) {
671 Context.init(Node); 672 Context.init(Node);
672 while (!Context.atEnd()) { 673 while (!Context.atEnd()) {
673 Variable *LoadDest = nullptr; 674 Variable *LoadDest = nullptr;
674 Operand *LoadSrc = nullptr; 675 Operand *LoadSrc = nullptr;
675 Inst *CurInst = Context.getCur(); 676 Inst *CurInst = Context.getCur();
676 Inst *Next = Context.getNextInst(); 677 Inst *Next = Context.getNextInst();
677 // Determine whether the current instruction is a Load instruction or 678 // Determine whether the current instruction is a Load instruction or
678 // equivalent. 679 // equivalent.
679 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 680 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
748 NewInst->spliceLivenessInfo(Next, CurInst); 749 NewInst->spliceLivenessInfo(Next, CurInst);
749 } 750 }
750 } 751 }
751 Context.advanceCur(); 752 Context.advanceCur();
752 Context.advanceNext(); 753 Context.advanceNext();
753 } 754 }
754 } 755 }
755 Func->dump("After load optimization"); 756 Func->dump("After load optimization");
756 } 757 }
757 758
758 template <class Machine> 759 template <typename TraitsType>
759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 760 bool TargetX86Base<TraitsType>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { 761 if (auto *Br = llvm::dyn_cast<InstX86Br>(I)) {
761 return Br->optimizeBranch(NextNode); 762 return Br->optimizeBranch(NextNode);
762 } 763 }
763 return false; 764 return false;
764 } 765 }
765 766
766 template <class Machine> 767 template <typename TraitsType>
767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 768 Variable *TargetX86Base<TraitsType>::getPhysicalRegister(SizeT RegNum,
769 Type Ty) {
768 if (Ty == IceType_void) 770 if (Ty == IceType_void)
769 Ty = IceType_i32; 771 Ty = IceType_i32;
770 if (PhysicalRegisters[Ty].empty()) 772 if (PhysicalRegisters[Ty].empty())
771 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 773 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
772 assert(RegNum < PhysicalRegisters[Ty].size()); 774 assert(RegNum < PhysicalRegisters[Ty].size());
773 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 775 Variable *Reg = PhysicalRegisters[Ty][RegNum];
774 if (Reg == nullptr) { 776 if (Reg == nullptr) {
775 Reg = Func->makeVariable(Ty); 777 Reg = Func->makeVariable(Ty);
776 Reg->setRegNum(RegNum); 778 Reg->setRegNum(RegNum);
777 PhysicalRegisters[Ty][RegNum] = Reg; 779 PhysicalRegisters[Ty][RegNum] = Reg;
778 // Specially mark a named physical register as an "argument" so that it is 780 // Specially mark a named physical register as an "argument" so that it is
779 // considered live upon function entry. Otherwise it's possible to get 781 // considered live upon function entry. Otherwise it's possible to get
780 // liveness validation errors for saving callee-save registers. 782 // liveness validation errors for saving callee-save registers.
781 Func->addImplicitArg(Reg); 783 Func->addImplicitArg(Reg);
782 // Don't bother tracking the live range of a named physical register. 784 // Don't bother tracking the live range of a named physical register.
783 Reg->setIgnoreLiveness(); 785 Reg->setIgnoreLiveness();
784 } 786 }
785 assert(Traits::getGprForType(Ty, RegNum) == static_cast<int32_t>(RegNum)); 787 assert(Traits::getGprForType(Ty, RegNum) == static_cast<int32_t>(RegNum));
786 return Reg; 788 return Reg;
787 } 789 }
788 790
789 template <class Machine> 791 template <typename TraitsType>
790 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 792 IceString TargetX86Base<TraitsType>::getRegName(SizeT RegNum, Type Ty) const {
791 return Traits::getRegName(Traits::getGprForType(Ty, RegNum)); 793 return Traits::getRegName(Traits::getGprForType(Ty, RegNum));
792 } 794 }
793 795
794 template <class Machine> 796 template <typename TraitsType>
795 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { 797 void TargetX86Base<TraitsType>::emitVariable(const Variable *Var) const {
796 if (!BuildDefs::dump()) 798 if (!BuildDefs::dump())
797 return; 799 return;
798 Ostream &Str = Ctx->getStrEmit(); 800 Ostream &Str = Ctx->getStrEmit();
799 if (Var->hasReg()) { 801 if (Var->hasReg()) {
800 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 802 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
801 return; 803 return;
802 } 804 }
803 if (Var->mustHaveReg()) { 805 if (Var->mustHaveReg()) {
804 llvm_unreachable("Infinite-weight Variable has no register assigned"); 806 llvm_unreachable("Infinite-weight Variable has no register assigned");
805 } 807 }
(...skipping 10 matching lines...) Expand all
816 if (DecorateAsm) { 818 if (DecorateAsm) {
817 Str << Var->getSymbolicStackOffset(Func); 819 Str << Var->getSymbolicStackOffset(Func);
818 } else { 820 } else {
819 Str << Offset; 821 Str << Offset;
820 } 822 }
821 } 823 }
822 const Type FrameSPTy = Traits::WordType; 824 const Type FrameSPTy = Traits::WordType;
823 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; 825 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
824 } 826 }
825 827
826 template <class Machine> 828 template <typename TraitsType>
827 typename TargetX86Base<Machine>::Traits::Address 829 typename TargetX86Base<TraitsType>::X86Address
828 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { 830 TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const {
829 if (Var->hasReg()) 831 if (Var->hasReg())
830 llvm_unreachable("Stack Variable has a register assigned"); 832 llvm_unreachable("Stack Variable has a register assigned");
831 if (Var->mustHaveReg()) { 833 if (Var->mustHaveReg()) {
832 llvm_unreachable("Infinite-weight Variable has no register assigned"); 834 llvm_unreachable("Infinite-weight Variable has no register assigned");
833 } 835 }
834 int32_t Offset = Var->getStackOffset(); 836 int32_t Offset = Var->getStackOffset();
835 int32_t BaseRegNum = Var->getBaseRegNum(); 837 int32_t BaseRegNum = Var->getBaseRegNum();
836 if (Var->getBaseRegNum() == Variable::NoRegister) 838 if (Var->getBaseRegNum() == Variable::NoRegister)
837 BaseRegNum = getFrameOrStackReg(); 839 BaseRegNum = getFrameOrStackReg();
838 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset, 840 return X86Address(Traits::getEncodedGPR(BaseRegNum), Offset,
839 AssemblerFixup::NoFixup); 841 AssemblerFixup::NoFixup);
840 } 842 }
841 843
842 /// Helper function for addProlog(). 844 /// Helper function for addProlog().
843 /// 845 ///
844 /// This assumes Arg is an argument passed on the stack. This sets the frame 846 /// This assumes Arg is an argument passed on the stack. This sets the frame
845 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 847 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
846 /// I64 arg that has been split into Lo and Hi components, it calls itself 848 /// I64 arg that has been split into Lo and Hi components, it calls itself
847 /// recursively on the components, taking care to handle Lo first because of the 849 /// recursively on the components, taking care to handle Lo first because of the
848 /// little-endian architecture. Lastly, this function generates an instruction 850 /// little-endian architecture. Lastly, this function generates an instruction
849 /// to copy Arg into its assigned register if applicable. 851 /// to copy Arg into its assigned register if applicable.
850 template <class Machine> 852 template <typename TraitsType>
851 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 853 void TargetX86Base<TraitsType>::finishArgumentLowering(
852 Variable *FramePtr, 854 Variable *Arg, Variable *FramePtr, size_t BasicFrameOffset,
853 size_t BasicFrameOffset, 855 size_t StackAdjBytes, size_t &InArgsSizeBytes) {
854 size_t StackAdjBytes,
855 size_t &InArgsSizeBytes) {
856 if (!Traits::Is64Bit) { 856 if (!Traits::Is64Bit) {
857 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { 857 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
858 Variable *Lo = Arg64On32->getLo(); 858 Variable *Lo = Arg64On32->getLo();
859 Variable *Hi = Arg64On32->getHi(); 859 Variable *Hi = Arg64On32->getHi();
860 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes, 860 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, StackAdjBytes,
861 InArgsSizeBytes); 861 InArgsSizeBytes);
862 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes, 862 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, StackAdjBytes,
863 InArgsSizeBytes); 863 InArgsSizeBytes);
864 return; 864 return;
865 } 865 }
866 } 866 }
867 Type Ty = Arg->getType(); 867 Type Ty = Arg->getType();
868 if (isVectorType(Ty)) { 868 if (isVectorType(Ty)) {
869 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 869 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
870 } 870 }
871 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 871 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
872 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 872 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
873 if (Arg->hasReg()) { 873 if (Arg->hasReg()) {
874 assert(Ty != IceType_i64 || Traits::Is64Bit); 874 assert(Ty != IceType_i64 || Traits::Is64Bit);
875 auto *Mem = Traits::X86OperandMem::create( 875 auto *Mem = X86OperandMem::create(
876 Func, Ty, FramePtr, 876 Func, Ty, FramePtr,
877 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes)); 877 Ctx->getConstantInt32(Arg->getStackOffset() + StackAdjBytes));
878 if (isVectorType(Arg->getType())) { 878 if (isVectorType(Arg->getType())) {
879 _movp(Arg, Mem); 879 _movp(Arg, Mem);
880 } else { 880 } else {
881 _mov(Arg, Mem); 881 _mov(Arg, Mem);
882 } 882 }
883 // This argument-copying instruction uses an explicit Traits::X86OperandMem 883 // This argument-copying instruction uses an explicit X86OperandMem
884 // operand instead of a Variable, so its fill-from-stack operation has to 884 // operand instead of a Variable, so its fill-from-stack operation has to
885 // be tracked separately for statistics. 885 // be tracked separately for statistics.
886 Ctx->statsUpdateFills(); 886 Ctx->statsUpdateFills();
887 } 887 }
888 } 888 }
889 889
890 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 890 template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
891 return Traits::WordType; 891 return Traits::WordType;
892 } 892 }
893 893
894 template <class Machine> 894 template <typename TraitsType>
895 template <typename T> 895 template <typename T>
896 typename std::enable_if<!T::Is64Bit, Operand>::type * 896 typename std::enable_if<!T::Is64Bit, Operand>::type *
897 TargetX86Base<Machine>::loOperand(Operand *Operand) { 897 TargetX86Base<TraitsType>::loOperand(Operand *Operand) {
898 assert(Operand->getType() == IceType_i64 || 898 assert(Operand->getType() == IceType_i64 ||
899 Operand->getType() == IceType_f64); 899 Operand->getType() == IceType_f64);
900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
901 return Operand; 901 return Operand;
902 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 902 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
903 return Var64On32->getLo(); 903 return Var64On32->getLo();
904 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 904 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
905 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 905 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
906 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 906 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
907 // Check if we need to blind/pool the constant. 907 // Check if we need to blind/pool the constant.
908 return legalize(ConstInt); 908 return legalize(ConstInt);
909 } 909 }
910 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { 910 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
911 auto *MemOperand = Traits::X86OperandMem::create( 911 auto *MemOperand = X86OperandMem::create(
912 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 912 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
913 Mem->getShift(), Mem->getSegmentRegister()); 913 Mem->getShift(), Mem->getSegmentRegister());
914 // Test if we should randomize or pool the offset, if so randomize it or 914 // Test if we should randomize or pool the offset, if so randomize it or
915 // pool it then create mem operand with the blinded/pooled constant. 915 // pool it then create mem operand with the blinded/pooled constant.
916 // Otherwise, return the mem operand as ordinary mem operand. 916 // Otherwise, return the mem operand as ordinary mem operand.
917 return legalize(MemOperand); 917 return legalize(MemOperand);
918 } 918 }
919 llvm_unreachable("Unsupported operand type"); 919 llvm_unreachable("Unsupported operand type");
920 return nullptr; 920 return nullptr;
921 } 921 }
922 922
923 template <class Machine> 923 template <typename TraitsType>
924 template <typename T> 924 template <typename T>
925 typename std::enable_if<!T::Is64Bit, Operand>::type * 925 typename std::enable_if<!T::Is64Bit, Operand>::type *
926 TargetX86Base<Machine>::hiOperand(Operand *Operand) { 926 TargetX86Base<TraitsType>::hiOperand(Operand *Operand) {
927 assert(Operand->getType() == IceType_i64 || 927 assert(Operand->getType() == IceType_i64 ||
928 Operand->getType() == IceType_f64); 928 Operand->getType() == IceType_f64);
929 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 929 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
930 return Operand; 930 return Operand;
931 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 931 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
932 return Var64On32->getHi(); 932 return Var64On32->getHi();
933 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 933 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
934 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 934 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
935 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); 935 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
936 // Check if we need to blind/pool the constant. 936 // Check if we need to blind/pool the constant.
937 return legalize(ConstInt); 937 return legalize(ConstInt);
938 } 938 }
939 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { 939 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Operand)) {
940 Constant *Offset = Mem->getOffset(); 940 Constant *Offset = Mem->getOffset();
941 if (Offset == nullptr) { 941 if (Offset == nullptr) {
942 Offset = Ctx->getConstantInt32(4); 942 Offset = Ctx->getConstantInt32(4);
943 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) { 943 } else if (auto *IntOffset = llvm::dyn_cast<ConstantInteger32>(Offset)) {
944 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); 944 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
945 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) { 945 } else if (auto *SymOffset = llvm::dyn_cast<ConstantRelocatable>(Offset)) {
946 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); 946 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
947 Offset = 947 Offset =
948 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), 948 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
949 SymOffset->getSuppressMangling()); 949 SymOffset->getSuppressMangling());
950 } 950 }
951 auto *MemOperand = Traits::X86OperandMem::create( 951 auto *MemOperand = X86OperandMem::create(
952 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), 952 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
953 Mem->getShift(), Mem->getSegmentRegister()); 953 Mem->getShift(), Mem->getSegmentRegister());
954 // Test if the Offset is an eligible i32 constants for randomization and 954 // Test if the Offset is an eligible i32 constants for randomization and
955 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem 955 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
956 // operand. 956 // operand.
957 return legalize(MemOperand); 957 return legalize(MemOperand);
958 } 958 }
959 llvm_unreachable("Unsupported operand type"); 959 llvm_unreachable("Unsupported operand type");
960 return nullptr; 960 return nullptr;
961 } 961 }
962 962
963 template <class Machine> 963 template <typename TraitsType>
964 llvm::SmallBitVector 964 llvm::SmallBitVector
965 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 965 TargetX86Base<TraitsType>::getRegisterSet(RegSetMask Include,
966 RegSetMask Exclude) const { 966 RegSetMask Exclude) const {
967 return Traits::getRegisterSet(Include, Exclude); 967 return Traits::getRegisterSet(Include, Exclude);
968 } 968 }
969 969
970 template <class Machine> 970 template <typename TraitsType>
971 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 971 void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Inst) {
972 // Conservatively require the stack to be aligned. Some stack adjustment 972 // Conservatively require the stack to be aligned. Some stack adjustment
973 // operations implemented below assume that the stack is aligned before the 973 // operations implemented below assume that the stack is aligned before the
974 // alloca. All the alloca code ensures that the stack alignment is preserved 974 // alloca. All the alloca code ensures that the stack alignment is preserved
975 // after the alloca. The stack alignment restriction can be relaxed in some 975 // after the alloca. The stack alignment restriction can be relaxed in some
976 // cases. 976 // cases.
977 NeedsStackAlignment = true; 977 NeedsStackAlignment = true;
978 978
979 // For default align=0, set it to the real value 1, to avoid any 979 // For default align=0, set it to the real value 1, to avoid any
980 // bit-manipulation problems below. 980 // bit-manipulation problems below.
981 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes()); 981 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
1027 _mov(T, TotalSize); 1027 _mov(T, TotalSize);
1028 } 1028 }
1029 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1029 _add(T, Ctx->getConstantInt32(Alignment - 1));
1030 _and(T, Ctx->getConstantInt32(-Alignment)); 1030 _and(T, Ctx->getConstantInt32(-Alignment));
1031 _sub(esp, T); 1031 _sub(esp, T);
1032 } 1032 }
1033 // Add enough to the returned address to account for the out args area. 1033 // Add enough to the returned address to account for the out args area.
1034 uint32_t OutArgsSize = maxOutArgsSizeBytes(); 1034 uint32_t OutArgsSize = maxOutArgsSizeBytes();
1035 if (OutArgsSize > 0) { 1035 if (OutArgsSize > 0) {
1036 Variable *T = makeReg(IceType_i32); 1036 Variable *T = makeReg(IceType_i32);
1037 typename Traits::X86OperandMem *CalculateOperand = 1037 auto *CalculateOperand = X86OperandMem::create(
1038 Traits::X86OperandMem::create( 1038 Func, IceType_i32, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
1039 Func, IceType_i32, esp,
1040 Ctx->getConstantInt(IceType_i32, OutArgsSize));
1041 _lea(T, CalculateOperand); 1039 _lea(T, CalculateOperand);
1042 _mov(Dest, T); 1040 _mov(Dest, T);
1043 } else { 1041 } else {
1044 _mov(Dest, esp); 1042 _mov(Dest, esp);
1045 } 1043 }
1046 } 1044 }
1047 1045
1048 /// Strength-reduce scalar integer multiplication by a constant (for i32 or 1046 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
1049 /// narrower) for certain constants. The lea instruction can be used to multiply 1047 /// narrower) for certain constants. The lea instruction can be used to multiply
1050 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of 1048 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1051 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 1049 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1052 /// lea-based multiplies by 5, combined with left-shifting by 2. 1050 /// lea-based multiplies by 5, combined with left-shifting by 2.
1053 template <class Machine> 1051 template <typename TraitsType>
1054 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1052 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1055 int32_t Src1) { 1053 int32_t Src1) {
1056 // Disable this optimization for Om1 and O0, just to keep things simple 1054 // Disable this optimization for Om1 and O0, just to keep things simple
1057 // there. 1055 // there.
1058 if (Ctx->getFlags().getOptLevel() < Opt_1) 1056 if (Ctx->getFlags().getOptLevel() < Opt_1)
1059 return false; 1057 return false;
1060 Type Ty = Dest->getType(); 1058 Type Ty = Dest->getType();
1061 Variable *T = nullptr; 1059 Variable *T = nullptr;
1062 if (Src1 == -1) { 1060 if (Src1 == -1) {
1063 _mov(T, Src0); 1061 _mov(T, Src0);
1064 _neg(T); 1062 _neg(T);
1065 _mov(Dest, T); 1063 _mov(Dest, T);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
1112 return false; 1110 return false;
1113 // Limit the number of lea/shl operations for a single multiply, to a 1111 // Limit the number of lea/shl operations for a single multiply, to a
1114 // somewhat arbitrary choice of 3. 1112 // somewhat arbitrary choice of 3.
1115 constexpr uint32_t MaxOpsForOptimizedMul = 3; 1113 constexpr uint32_t MaxOpsForOptimizedMul = 3;
1116 if (CountOps > MaxOpsForOptimizedMul) 1114 if (CountOps > MaxOpsForOptimizedMul)
1117 return false; 1115 return false;
1118 _mov(T, Src0); 1116 _mov(T, Src0);
1119 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1117 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1120 for (uint32_t i = 0; i < Count9; ++i) { 1118 for (uint32_t i = 0; i < Count9; ++i) {
1121 constexpr uint16_t Shift = 3; // log2(9-1) 1119 constexpr uint16_t Shift = 3; // log2(9-1)
1122 _lea(T, 1120 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1123 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1124 } 1121 }
1125 for (uint32_t i = 0; i < Count5; ++i) { 1122 for (uint32_t i = 0; i < Count5; ++i) {
1126 constexpr uint16_t Shift = 2; // log2(5-1) 1123 constexpr uint16_t Shift = 2; // log2(5-1)
1127 _lea(T, 1124 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1128 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1129 } 1125 }
1130 for (uint32_t i = 0; i < Count3; ++i) { 1126 for (uint32_t i = 0; i < Count3; ++i) {
1131 constexpr uint16_t Shift = 1; // log2(3-1) 1127 constexpr uint16_t Shift = 1; // log2(3-1)
1132 _lea(T, 1128 _lea(T, X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1133 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1134 } 1129 }
1135 if (Count2) { 1130 if (Count2) {
1136 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1131 _shl(T, Ctx->getConstantInt(Ty, Count2));
1137 } 1132 }
1138 if (Src1IsNegative) 1133 if (Src1IsNegative)
1139 _neg(T); 1134 _neg(T);
1140 _mov(Dest, T); 1135 _mov(Dest, T);
1141 return true; 1136 return true;
1142 } 1137 }
1143 1138
1144 template <class Machine> 1139 template <typename TraitsType>
1145 void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op, 1140 void TargetX86Base<TraitsType>::lowerShift64(InstArithmetic::OpKind Op,
1146 Operand *Src0Lo, Operand *Src0Hi, 1141 Operand *Src0Lo, Operand *Src0Hi,
1147 Operand *Src1Lo, Variable *DestLo, 1142 Operand *Src1Lo, Variable *DestLo,
1148 Variable *DestHi) { 1143 Variable *DestHi) {
1149 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. 1144 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1150 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1145 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1151 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1146 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1152 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1147 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1153 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { 1148 if (auto *ConstantShiftAmount = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1154 uint32_t ShiftAmount = ConstantShiftAmount->getValue(); 1149 uint32_t ShiftAmount = ConstantShiftAmount->getValue();
1155 if (ShiftAmount > 32) { 1150 if (ShiftAmount > 32) {
1156 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32); 1151 Constant *ReducedShift = Ctx->getConstantInt32(ShiftAmount - 32);
1157 switch (Op) { 1152 switch (Op) {
1158 default: 1153 default:
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
1267 } 1262 }
1268 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> 1263 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1269 // a.lo = t2 1264 // a.lo = t2
1270 // a.hi = t3 1265 // a.hi = t3
1271 _mov(DestLo, T_2); 1266 _mov(DestLo, T_2);
1272 _mov(DestHi, T_3); 1267 _mov(DestHi, T_3);
1273 } 1268 }
1274 } else { 1269 } else {
1275 // NON-CONSTANT CASES. 1270 // NON-CONSTANT CASES.
1276 Constant *BitTest = Ctx->getConstantInt32(0x20); 1271 Constant *BitTest = Ctx->getConstantInt32(0x20);
1277 typename Traits::Insts::Label *Label = 1272 InstX86Label *Label = InstX86Label::create(Func, this);
1278 Traits::Insts::Label::create(Func, this);
1279 // COMMON PREFIX OF: a=b SHIFT_OP c ==> 1273 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1280 // t1:ecx = c.lo & 0xff 1274 // t1:ecx = c.lo & 0xff
1281 // t2 = b.lo 1275 // t2 = b.lo
1282 // t3 = b.hi 1276 // t3 = b.hi
1283 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl); 1277 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
1284 _mov(T_2, Src0Lo); 1278 _mov(T_2, Src0Lo);
1285 _mov(T_3, Src0Hi); 1279 _mov(T_3, Src0Hi);
1286 switch (Op) { 1280 switch (Op) {
1287 default: 1281 default:
1288 assert(0 && "non-shift op"); 1282 assert(0 && "non-shift op");
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
1348 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> 1342 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1349 // L1: 1343 // L1:
1350 // a.lo = t2 1344 // a.lo = t2
1351 // a.hi = t3 1345 // a.hi = t3
1352 Context.insert(Label); 1346 Context.insert(Label);
1353 _mov(DestLo, T_2); 1347 _mov(DestLo, T_2);
1354 _mov(DestHi, T_3); 1348 _mov(DestHi, T_3);
1355 } 1349 }
1356 } 1350 }
1357 1351
1358 template <class Machine> 1352 template <typename TraitsType>
1359 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { 1353 void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
1360 Variable *Dest = Inst->getDest(); 1354 Variable *Dest = Inst->getDest();
1361 if (Dest->isRematerializable()) { 1355 if (Dest->isRematerializable()) {
1362 Context.insert<InstFakeDef>(Dest); 1356 Context.insert<InstFakeDef>(Dest);
1363 return; 1357 return;
1364 } 1358 }
1365 Type Ty = Dest->getType(); 1359 Type Ty = Dest->getType();
1366 Operand *Src0 = legalize(Inst->getSrc(0)); 1360 Operand *Src0 = legalize(Inst->getSrc(0));
1367 Operand *Src1 = legalize(Inst->getSrc(1)); 1361 Operand *Src1 = legalize(Inst->getSrc(1));
1368 if (Inst->isCommutative()) { 1362 if (Inst->isCommutative()) {
1369 uint32_t SwapCount = 0; 1363 uint32_t SwapCount = 0;
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
1501 case InstArithmetic::Srem: 1495 case InstArithmetic::Srem:
1502 llvm_unreachable("Call-helper-involved instruction for i64 type \ 1496 llvm_unreachable("Call-helper-involved instruction for i64 type \
1503 should have already been handled before"); 1497 should have already been handled before");
1504 break; 1498 break;
1505 } 1499 }
1506 return; 1500 return;
1507 } 1501 }
1508 if (isVectorType(Ty)) { 1502 if (isVectorType(Ty)) {
1509 // TODO: Trap on integer divide and integer modulo by zero. See: 1503 // TODO: Trap on integer divide and integer modulo by zero. See:
1510 // https://code.google.com/p/nativeclient/issues/detail?id=3899 1504 // https://code.google.com/p/nativeclient/issues/detail?id=3899
1511 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) 1505 if (llvm::isa<X86OperandMem>(Src1))
1512 Src1 = legalizeToReg(Src1); 1506 Src1 = legalizeToReg(Src1);
1513 switch (Inst->getOp()) { 1507 switch (Inst->getOp()) {
1514 case InstArithmetic::_num: 1508 case InstArithmetic::_num:
1515 llvm_unreachable("Unknown arithmetic operator"); 1509 llvm_unreachable("Unknown arithmetic operator");
1516 break; 1510 break;
1517 case InstArithmetic::Add: { 1511 case InstArithmetic::Add: {
1518 Variable *T = makeReg(Ty); 1512 Variable *T = makeReg(Ty);
1519 _movp(T, Src0); 1513 _movp(T, Src0);
1520 _padd(T, Src1); 1514 _padd(T, Src1);
1521 _movp(Dest, T); 1515 _movp(Dest, T);
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after
1920 _mov(T, Src0); 1914 _mov(T, Src0);
1921 _divss(T, Src1); 1915 _divss(T, Src1);
1922 _mov(Dest, T); 1916 _mov(Dest, T);
1923 break; 1917 break;
1924 case InstArithmetic::Frem: 1918 case InstArithmetic::Frem:
1925 llvm::report_fatal_error("Helper call was expected"); 1919 llvm::report_fatal_error("Helper call was expected");
1926 break; 1920 break;
1927 } 1921 }
1928 } 1922 }
1929 1923
1930 template <class Machine> 1924 template <typename TraitsType>
1931 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1925 void TargetX86Base<TraitsType>::lowerAssign(const InstAssign *Inst) {
1932 Variable *Dest = Inst->getDest(); 1926 Variable *Dest = Inst->getDest();
1933 if (Dest->isRematerializable()) { 1927 if (Dest->isRematerializable()) {
1934 Context.insert<InstFakeDef>(Dest); 1928 Context.insert<InstFakeDef>(Dest);
1935 return; 1929 return;
1936 } 1930 }
1937 Operand *Src = Inst->getSrc(0); 1931 Operand *Src = Inst->getSrc(0);
1938 assert(Dest->getType() == Src->getType()); 1932 assert(Dest->getType() == Src->getType());
1939 lowerMove(Dest, Src, false); 1933 lowerMove(Dest, Src, false);
1940 } 1934 }
1941 1935
1942 template <class Machine> 1936 template <typename TraitsType>
1943 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { 1937 void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) {
1944 if (Br->isUnconditional()) { 1938 if (Br->isUnconditional()) {
1945 _br(Br->getTargetUnconditional()); 1939 _br(Br->getTargetUnconditional());
1946 return; 1940 return;
1947 } 1941 }
1948 Operand *Cond = Br->getCondition(); 1942 Operand *Cond = Br->getCondition();
1949 1943
1950 // Handle folding opportunities. 1944 // Handle folding opportunities.
1951 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 1945 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1952 assert(Producer->isDeleted()); 1946 assert(Producer->isDeleted());
1953 switch (BoolFolding::getProducerKind(Producer)) { 1947 switch (BoolFolding::getProducerKind(Producer)) {
(...skipping 13 matching lines...) Expand all
1967 return; 1961 return;
1968 } 1962 }
1969 } 1963 }
1970 } 1964 }
1971 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 1965 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1972 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1966 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1973 _cmp(Src0, Zero); 1967 _cmp(Src0, Zero);
1974 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 1968 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
1975 } 1969 }
1976 1970
1977 template <class Machine> 1971 template <typename TraitsType>
1978 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { 1972 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) {
1979 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 1973 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1980 InstCast::OpKind CastKind = Inst->getCastKind(); 1974 InstCast::OpKind CastKind = Inst->getCastKind();
1981 Variable *Dest = Inst->getDest(); 1975 Variable *Dest = Inst->getDest();
1982 Type DestTy = Dest->getType(); 1976 Type DestTy = Dest->getType();
1983 switch (CastKind) { 1977 switch (CastKind) {
1984 default: 1978 default:
1985 Func->setError("Cast type not supported"); 1979 Func->setError("Cast type not supported");
1986 return; 1980 return;
1987 case InstCast::Sext: { 1981 case InstCast::Sext: {
1988 // Src0RM is the source operand legalized to physical register or memory, 1982 // Src0RM is the source operand legalized to physical register or memory,
(...skipping 157 matching lines...) Expand 10 before | Expand all | Expand 10 after
2146 Variable *T = makeReg(DestTy); 2140 Variable *T = makeReg(DestTy);
2147 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); 2141 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
2148 _mov(Dest, T); 2142 _mov(Dest, T);
2149 break; 2143 break;
2150 } 2144 }
2151 case InstCast::Fptosi: 2145 case InstCast::Fptosi:
2152 if (isVectorType(DestTy)) { 2146 if (isVectorType(DestTy)) {
2153 assert(DestTy == IceType_v4i32 && 2147 assert(DestTy == IceType_v4i32 &&
2154 Inst->getSrc(0)->getType() == IceType_v4f32); 2148 Inst->getSrc(0)->getType() == IceType_v4f32);
2155 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2149 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2156 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2150 if (llvm::isa<X86OperandMem>(Src0RM))
2157 Src0RM = legalizeToReg(Src0RM); 2151 Src0RM = legalizeToReg(Src0RM);
2158 Variable *T = makeReg(DestTy); 2152 Variable *T = makeReg(DestTy);
2159 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2153 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2160 _movp(Dest, T); 2154 _movp(Dest, T);
2161 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { 2155 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2162 llvm::report_fatal_error("Helper call was expected"); 2156 llvm::report_fatal_error("Helper call was expected");
2163 } else { 2157 } else {
2164 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2158 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2165 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2159 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2166 Variable *T_1 = nullptr; 2160 Variable *T_1 = nullptr;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
2212 if (DestTy == IceType_i1) 2206 if (DestTy == IceType_i1)
2213 _and(T_2, Ctx->getConstantInt1(1)); 2207 _and(T_2, Ctx->getConstantInt1(1));
2214 _mov(Dest, T_2); 2208 _mov(Dest, T_2);
2215 } 2209 }
2216 break; 2210 break;
2217 case InstCast::Sitofp: 2211 case InstCast::Sitofp:
2218 if (isVectorType(DestTy)) { 2212 if (isVectorType(DestTy)) {
2219 assert(DestTy == IceType_v4f32 && 2213 assert(DestTy == IceType_v4f32 &&
2220 Inst->getSrc(0)->getType() == IceType_v4i32); 2214 Inst->getSrc(0)->getType() == IceType_v4i32);
2221 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2215 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2222 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2216 if (llvm::isa<X86OperandMem>(Src0RM))
2223 Src0RM = legalizeToReg(Src0RM); 2217 Src0RM = legalizeToReg(Src0RM);
2224 Variable *T = makeReg(DestTy); 2218 Variable *T = makeReg(DestTy);
2225 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2219 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2226 _movp(Dest, T); 2220 _movp(Dest, T);
2227 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { 2221 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2228 llvm::report_fatal_error("Helper call was expected"); 2222 llvm::report_fatal_error("Helper call was expected");
2229 } else { 2223 } else {
2230 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2224 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2231 // Sign-extend the operand. 2225 // Sign-extend the operand.
2232 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2226 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
2297 Type SrcType = Src0RM->getType(); 2291 Type SrcType = Src0RM->getType();
2298 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || 2292 assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
2299 (DestTy == IceType_f32 && SrcType == IceType_i32)); 2293 (DestTy == IceType_f32 && SrcType == IceType_i32));
2300 // a.i32 = bitcast b.f32 ==> 2294 // a.i32 = bitcast b.f32 ==>
2301 // t.f32 = b.f32 2295 // t.f32 = b.f32
2302 // s.f32 = spill t.f32 2296 // s.f32 = spill t.f32
2303 // a.i32 = s.f32 2297 // a.i32 = s.f32
2304 Variable *T = nullptr; 2298 Variable *T = nullptr;
2305 // TODO: Should be able to force a spill setup by calling legalize() with 2299 // TODO: Should be able to force a spill setup by calling legalize() with
2306 // Legal_Mem and not Legal_Reg or Legal_Imm. 2300 // Legal_Mem and not Legal_Reg or Legal_Imm.
2307 typename Traits::SpillVariable *SpillVar = 2301 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
2308 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
2309 SpillVar->setLinkedTo(Dest); 2302 SpillVar->setLinkedTo(Dest);
2310 Variable *Spill = SpillVar; 2303 Variable *Spill = SpillVar;
2311 Spill->setMustNotHaveReg(); 2304 Spill->setMustNotHaveReg();
2312 _mov(T, Src0RM); 2305 _mov(T, Src0RM);
2313 _mov(Spill, T); 2306 _mov(Spill, T);
2314 _mov(Dest, Spill); 2307 _mov(Dest, Spill);
2315 } break; 2308 } break;
2316 case IceType_i64: { 2309 case IceType_i64: {
2317 assert(Src0->getType() == IceType_f64); 2310 assert(Src0->getType() == IceType_f64);
2318 if (Traits::Is64Bit) { 2311 if (Traits::Is64Bit) {
2319 Variable *Src0R = legalizeToReg(Src0); 2312 Variable *Src0R = legalizeToReg(Src0);
2320 Variable *T = makeReg(IceType_i64); 2313 Variable *T = makeReg(IceType_i64);
2321 _movd(T, Src0R); 2314 _movd(T, Src0R);
2322 _mov(Dest, T); 2315 _mov(Dest, T);
2323 } else { 2316 } else {
2324 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2317 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2325 // a.i64 = bitcast b.f64 ==> 2318 // a.i64 = bitcast b.f64 ==>
2326 // s.f64 = spill b.f64 2319 // s.f64 = spill b.f64
2327 // t_lo.i32 = lo(s.f64) 2320 // t_lo.i32 = lo(s.f64)
2328 // a_lo.i32 = t_lo.i32 2321 // a_lo.i32 = t_lo.i32
2329 // t_hi.i32 = hi(s.f64) 2322 // t_hi.i32 = hi(s.f64)
2330 // a_hi.i32 = t_hi.i32 2323 // a_hi.i32 = t_hi.i32
2331 Operand *SpillLo, *SpillHi; 2324 Operand *SpillLo, *SpillHi;
2332 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { 2325 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2333 typename Traits::SpillVariable *SpillVar = 2326 SpillVariable *SpillVar =
2334 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); 2327 Func->makeVariable<SpillVariable>(IceType_f64);
2335 SpillVar->setLinkedTo(Src0Var); 2328 SpillVar->setLinkedTo(Src0Var);
2336 Variable *Spill = SpillVar; 2329 Variable *Spill = SpillVar;
2337 Spill->setMustNotHaveReg(); 2330 Spill->setMustNotHaveReg();
2338 _movq(Spill, Src0RM); 2331 _movq(Spill, Src0RM);
2339 SpillLo = Traits::VariableSplit::create(Func, Spill, 2332 SpillLo = Traits::VariableSplit::create(Func, Spill,
2340 Traits::VariableSplit::Low); 2333 Traits::VariableSplit::Low);
2341 SpillHi = Traits::VariableSplit::create(Func, Spill, 2334 SpillHi = Traits::VariableSplit::create(Func, Spill,
2342 Traits::VariableSplit::High); 2335 Traits::VariableSplit::High);
2343 } else { 2336 } else {
2344 SpillLo = loOperand(Src0RM); 2337 SpillLo = loOperand(Src0RM);
(...skipping 13 matching lines...) Expand all
2358 } break; 2351 } break;
2359 case IceType_f64: { 2352 case IceType_f64: {
2360 assert(Src0->getType() == IceType_i64); 2353 assert(Src0->getType() == IceType_i64);
2361 if (Traits::Is64Bit) { 2354 if (Traits::Is64Bit) {
2362 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2355 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2363 Variable *T = makeReg(IceType_f64); 2356 Variable *T = makeReg(IceType_f64);
2364 _movd(T, Src0RM); 2357 _movd(T, Src0RM);
2365 _mov(Dest, T); 2358 _mov(Dest, T);
2366 } else { 2359 } else {
2367 Src0 = legalize(Src0); 2360 Src0 = legalize(Src0);
2368 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { 2361 if (llvm::isa<X86OperandMem>(Src0)) {
2369 Variable *T = Func->makeVariable(DestTy); 2362 Variable *T = Func->makeVariable(DestTy);
2370 _movq(T, Src0); 2363 _movq(T, Src0);
2371 _movq(Dest, T); 2364 _movq(Dest, T);
2372 break; 2365 break;
2373 } 2366 }
2374 // a.f64 = bitcast b.i64 ==> 2367 // a.f64 = bitcast b.i64 ==>
2375 // t_lo.i32 = b_lo.i32 2368 // t_lo.i32 = b_lo.i32
2376 // FakeDef(s.f64) 2369 // FakeDef(s.f64)
2377 // lo(s.f64) = t_lo.i32 2370 // lo(s.f64) = t_lo.i32
2378 // t_hi.i32 = b_hi.i32 2371 // t_hi.i32 = b_hi.i32
2379 // hi(s.f64) = t_hi.i32 2372 // hi(s.f64) = t_hi.i32
2380 // a.f64 = s.f64 2373 // a.f64 = s.f64
2381 typename Traits::SpillVariable *SpillVar = 2374 SpillVariable *SpillVar =
2382 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); 2375 Func->makeVariable<SpillVariable>(IceType_f64);
2383 SpillVar->setLinkedTo(Dest); 2376 SpillVar->setLinkedTo(Dest);
2384 Variable *Spill = SpillVar; 2377 Variable *Spill = SpillVar;
2385 Spill->setMustNotHaveReg(); 2378 Spill->setMustNotHaveReg();
2386 2379
2387 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2380 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2388 auto *SpillLo = Traits::VariableSplit::create( 2381 auto *SpillLo = Traits::VariableSplit::create(
2389 Func, Spill, Traits::VariableSplit::Low); 2382 Func, Spill, Traits::VariableSplit::Low);
2390 auto *SpillHi = Traits::VariableSplit::create( 2383 auto *SpillHi = Traits::VariableSplit::create(
2391 Func, Spill, Traits::VariableSplit::High); 2384 Func, Spill, Traits::VariableSplit::High);
2392 _mov(T_Lo, loOperand(Src0)); 2385 _mov(T_Lo, loOperand(Src0));
(...skipping 18 matching lines...) Expand all
2411 case IceType_v4i32: 2404 case IceType_v4i32:
2412 case IceType_v4f32: { 2405 case IceType_v4f32: {
2413 _movp(Dest, legalizeToReg(Src0)); 2406 _movp(Dest, legalizeToReg(Src0));
2414 } break; 2407 } break;
2415 } 2408 }
2416 break; 2409 break;
2417 } 2410 }
2418 } 2411 }
2419 } 2412 }
2420 2413
2421 template <class Machine> 2414 template <typename TraitsType>
2422 void TargetX86Base<Machine>::lowerExtractElement( 2415 void TargetX86Base<TraitsType>::lowerExtractElement(
2423 const InstExtractElement *Inst) { 2416 const InstExtractElement *Inst) {
2424 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2417 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2425 ConstantInteger32 *ElementIndex = 2418 ConstantInteger32 *ElementIndex =
2426 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); 2419 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2427 // Only constant indices are allowed in PNaCl IR. 2420 // Only constant indices are allowed in PNaCl IR.
2428 assert(ElementIndex); 2421 assert(ElementIndex);
2429 2422
2430 unsigned Index = ElementIndex->getValue(); 2423 unsigned Index = ElementIndex->getValue();
2431 Type Ty = SourceVectNotLegalized->getType(); 2424 Type Ty = SourceVectNotLegalized->getType();
2432 Type ElementTy = typeElementType(Ty); 2425 Type ElementTy = typeElementType(Ty);
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2474 // Spill the value to a stack slot and do the extraction in memory. 2467 // Spill the value to a stack slot and do the extraction in memory.
2475 // 2468 //
2476 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support 2469 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
2477 // for legalizing to mem is implemented. 2470 // for legalizing to mem is implemented.
2478 Variable *Slot = Func->makeVariable(Ty); 2471 Variable *Slot = Func->makeVariable(Ty);
2479 Slot->setMustNotHaveReg(); 2472 Slot->setMustNotHaveReg();
2480 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); 2473 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
2481 2474
2482 // Compute the location of the element in memory. 2475 // Compute the location of the element in memory.
2483 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 2476 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2484 typename Traits::X86OperandMem *Loc = 2477 X86OperandMem *Loc =
2485 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 2478 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2486 _mov(ExtractedElementR, Loc); 2479 _mov(ExtractedElementR, Loc);
2487 } 2480 }
2488 2481
2489 if (ElementTy == IceType_i1) { 2482 if (ElementTy == IceType_i1) {
2490 // Truncate extracted integers to i1s if necessary. 2483 // Truncate extracted integers to i1s if necessary.
2491 Variable *T = makeReg(IceType_i1); 2484 Variable *T = makeReg(IceType_i1);
2492 InstCast *Cast = 2485 InstCast *Cast =
2493 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); 2486 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2494 lowerCast(Cast); 2487 lowerCast(Cast);
2495 ExtractedElementR = T; 2488 ExtractedElementR = T;
2496 } 2489 }
2497 2490
2498 // Copy the element to the destination. 2491 // Copy the element to the destination.
2499 Variable *Dest = Inst->getDest(); 2492 Variable *Dest = Inst->getDest();
2500 _mov(Dest, ExtractedElementR); 2493 _mov(Dest, ExtractedElementR);
2501 } 2494 }
2502 2495
2503 template <class Machine> 2496 template <typename TraitsType>
2504 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) { 2497 void TargetX86Base<TraitsType>::lowerFcmp(const InstFcmp *Fcmp) {
2505 Variable *Dest = Fcmp->getDest(); 2498 Variable *Dest = Fcmp->getDest();
2506 2499
2507 if (isVectorType(Dest->getType())) { 2500 if (isVectorType(Dest->getType())) {
2508 lowerFcmpVector(Fcmp); 2501 lowerFcmpVector(Fcmp);
2509 } else { 2502 } else {
2510 constexpr Inst *Consumer = nullptr; 2503 constexpr Inst *Consumer = nullptr;
2511 lowerFcmpAndConsumer(Fcmp, Consumer); 2504 lowerFcmpAndConsumer(Fcmp, Consumer);
2512 } 2505 }
2513 } 2506 }
2514 2507
2515 template <class Machine> 2508 template <typename TraitsType>
2516 void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, 2509 void TargetX86Base<TraitsType>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
2517 const Inst *Consumer) { 2510 const Inst *Consumer) {
2518 Operand *Src0 = Fcmp->getSrc(0); 2511 Operand *Src0 = Fcmp->getSrc(0);
2519 Operand *Src1 = Fcmp->getSrc(1); 2512 Operand *Src1 = Fcmp->getSrc(1);
2520 Variable *Dest = Fcmp->getDest(); 2513 Variable *Dest = Fcmp->getDest();
2521 2514
2522 if (isVectorType(Dest->getType())) 2515 if (isVectorType(Dest->getType()))
2523 llvm::report_fatal_error("Vector compare/branch cannot be folded"); 2516 llvm::report_fatal_error("Vector compare/branch cannot be folded");
2524 2517
2525 if (Consumer != nullptr) { 2518 if (Consumer != nullptr) {
2526 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { 2519 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2527 if (lowerOptimizeFcmpSelect(Fcmp, Select)) 2520 if (lowerOptimizeFcmpSelect(Fcmp, Select))
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
2559 assert(Traits::TableFcmp[Index].Default); 2552 assert(Traits::TableFcmp[Index].Default);
2560 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); 2553 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer);
2561 return; 2554 return;
2562 } 2555 }
2563 } 2556 }
2564 int32_t IntDefault = Traits::TableFcmp[Index].Default; 2557 int32_t IntDefault = Traits::TableFcmp[Index].Default;
2565 if (Consumer == nullptr) { 2558 if (Consumer == nullptr) {
2566 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); 2559 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
2567 _mov(Dest, Default); 2560 _mov(Dest, Default);
2568 if (HasC1) { 2561 if (HasC1) {
2569 typename Traits::Insts::Label *Label = 2562 InstX86Label *Label = InstX86Label::create(Func, this);
2570 Traits::Insts::Label::create(Func, this);
2571 _br(Traits::TableFcmp[Index].C1, Label); 2563 _br(Traits::TableFcmp[Index].C1, Label);
2572 if (HasC2) { 2564 if (HasC2) {
2573 _br(Traits::TableFcmp[Index].C2, Label); 2565 _br(Traits::TableFcmp[Index].C2, Label);
2574 } 2566 }
2575 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); 2567 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
2576 _redefined(_mov(Dest, NonDefault)); 2568 _redefined(_mov(Dest, NonDefault));
2577 Context.insert(Label); 2569 Context.insert(Label);
2578 } 2570 }
2579 return; 2571 return;
2580 } 2572 }
(...skipping 14 matching lines...) Expand all
2595 return; 2587 return;
2596 } 2588 }
2597 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { 2589 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2598 Operand *SrcT = Select->getTrueOperand(); 2590 Operand *SrcT = Select->getTrueOperand();
2599 Operand *SrcF = Select->getFalseOperand(); 2591 Operand *SrcF = Select->getFalseOperand();
2600 Variable *SelectDest = Select->getDest(); 2592 Variable *SelectDest = Select->getDest();
2601 if (IntDefault != 0) 2593 if (IntDefault != 0)
2602 std::swap(SrcT, SrcF); 2594 std::swap(SrcT, SrcF);
2603 lowerMove(SelectDest, SrcF, false); 2595 lowerMove(SelectDest, SrcF, false);
2604 if (HasC1) { 2596 if (HasC1) {
2605 typename Traits::Insts::Label *Label = 2597 InstX86Label *Label = InstX86Label::create(Func, this);
2606 Traits::Insts::Label::create(Func, this);
2607 _br(Traits::TableFcmp[Index].C1, Label); 2598 _br(Traits::TableFcmp[Index].C1, Label);
2608 if (HasC2) { 2599 if (HasC2) {
2609 _br(Traits::TableFcmp[Index].C2, Label); 2600 _br(Traits::TableFcmp[Index].C2, Label);
2610 } 2601 }
2611 static constexpr bool IsRedefinition = true; 2602 static constexpr bool IsRedefinition = true;
2612 lowerMove(SelectDest, SrcT, IsRedefinition); 2603 lowerMove(SelectDest, SrcT, IsRedefinition);
2613 Context.insert(Label); 2604 Context.insert(Label);
2614 } 2605 }
2615 return; 2606 return;
2616 } 2607 }
2617 llvm::report_fatal_error("Unexpected consumer type"); 2608 llvm::report_fatal_error("Unexpected consumer type");
2618 } 2609 }
2619 2610
2620 template <class Machine> 2611 template <typename TraitsType>
2621 void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) { 2612 void TargetX86Base<TraitsType>::lowerFcmpVector(const InstFcmp *Fcmp) {
2622 Operand *Src0 = Fcmp->getSrc(0); 2613 Operand *Src0 = Fcmp->getSrc(0);
2623 Operand *Src1 = Fcmp->getSrc(1); 2614 Operand *Src1 = Fcmp->getSrc(1);
2624 Variable *Dest = Fcmp->getDest(); 2615 Variable *Dest = Fcmp->getDest();
2625 2616
2626 if (!isVectorType(Dest->getType())) 2617 if (!isVectorType(Dest->getType()))
2627 llvm::report_fatal_error("Expected vector compare"); 2618 llvm::report_fatal_error("Expected vector compare");
2628 2619
2629 InstFcmp::FCond Condition = Fcmp->getCondition(); 2620 InstFcmp::FCond Condition = Fcmp->getCondition();
2630 size_t Index = static_cast<size_t>(Condition); 2621 size_t Index = static_cast<size_t>(Condition);
2631 assert(Index < Traits::TableFcmpSize); 2622 assert(Index < Traits::TableFcmpSize);
2632 2623
2633 if (Traits::TableFcmp[Index].SwapVectorOperands) 2624 if (Traits::TableFcmp[Index].SwapVectorOperands)
2634 std::swap(Src0, Src1); 2625 std::swap(Src0, Src1);
2635 2626
2636 Variable *T = nullptr; 2627 Variable *T = nullptr;
2637 2628
2638 if (Condition == InstFcmp::True) { 2629 if (Condition == InstFcmp::True) {
2639 // makeVectorOfOnes() requires an integer vector type. 2630 // makeVectorOfOnes() requires an integer vector type.
2640 T = makeVectorOfMinusOnes(IceType_v4i32); 2631 T = makeVectorOfMinusOnes(IceType_v4i32);
2641 } else if (Condition == InstFcmp::False) { 2632 } else if (Condition == InstFcmp::False) {
2642 T = makeVectorOfZeros(Dest->getType()); 2633 T = makeVectorOfZeros(Dest->getType());
2643 } else { 2634 } else {
2644 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2635 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2645 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2636 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2646 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) 2637 if (llvm::isa<X86OperandMem>(Src1RM))
2647 Src1RM = legalizeToReg(Src1RM); 2638 Src1RM = legalizeToReg(Src1RM);
2648 2639
2649 switch (Condition) { 2640 switch (Condition) {
2650 default: { 2641 default: {
2651 typename Traits::Cond::CmppsCond Predicate = 2642 CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;
2652 Traits::TableFcmp[Index].Predicate;
2653 assert(Predicate != Traits::Cond::Cmpps_Invalid); 2643 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2654 T = makeReg(Src0RM->getType()); 2644 T = makeReg(Src0RM->getType());
2655 _movp(T, Src0RM); 2645 _movp(T, Src0RM);
2656 _cmpps(T, Src1RM, Predicate); 2646 _cmpps(T, Src1RM, Predicate);
2657 } break; 2647 } break;
2658 case InstFcmp::One: { 2648 case InstFcmp::One: {
2659 // Check both unequal and ordered. 2649 // Check both unequal and ordered.
2660 T = makeReg(Src0RM->getType()); 2650 T = makeReg(Src0RM->getType());
2661 Variable *T2 = makeReg(Src0RM->getType()); 2651 Variable *T2 = makeReg(Src0RM->getType());
2662 _movp(T, Src0RM); 2652 _movp(T, Src0RM);
(...skipping 21 matching lines...) Expand all
2684 } 2674 }
2685 2675
2686 inline bool isZero(const Operand *Opnd) { 2676 inline bool isZero(const Operand *Opnd) {
2687 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) 2677 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
2688 return C64->getValue() == 0; 2678 return C64->getValue() == 0;
2689 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) 2679 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
2690 return C32->getValue() == 0; 2680 return C32->getValue() == 0;
2691 return false; 2681 return false;
2692 } 2682 }
2693 2683
2694 template <class Machine> 2684 template <typename TraitsType>
2695 void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp, 2685 void TargetX86Base<TraitsType>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
2696 const Inst *Consumer) { 2686 const Inst *Consumer) {
2697 Operand *Src0 = legalize(Icmp->getSrc(0)); 2687 Operand *Src0 = legalize(Icmp->getSrc(0));
2698 Operand *Src1 = legalize(Icmp->getSrc(1)); 2688 Operand *Src1 = legalize(Icmp->getSrc(1));
2699 Variable *Dest = Icmp->getDest(); 2689 Variable *Dest = Icmp->getDest();
2700 2690
2701 if (isVectorType(Dest->getType())) 2691 if (isVectorType(Dest->getType()))
2702 llvm::report_fatal_error("Vector compare/branch cannot be folded"); 2692 llvm::report_fatal_error("Vector compare/branch cannot be folded");
2703 2693
2704 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { 2694 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2705 lowerIcmp64(Icmp, Consumer); 2695 lowerIcmp64(Icmp, Consumer);
2706 return; 2696 return;
(...skipping 11 matching lines...) Expand all
2718 movOrConsumer(false, Dest, Consumer); 2708 movOrConsumer(false, Dest, Consumer);
2719 return; 2709 return;
2720 } 2710 }
2721 } 2711 }
2722 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2712 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2723 _cmp(Src0RM, Src1); 2713 _cmp(Src0RM, Src1);
2724 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, 2714 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
2725 Consumer); 2715 Consumer);
2726 } 2716 }
2727 2717
2728 template <class Machine> 2718 template <typename TraitsType>
2729 void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) { 2719 void TargetX86Base<TraitsType>::lowerIcmpVector(const InstIcmp *Icmp) {
2730 Operand *Src0 = legalize(Icmp->getSrc(0)); 2720 Operand *Src0 = legalize(Icmp->getSrc(0));
2731 Operand *Src1 = legalize(Icmp->getSrc(1)); 2721 Operand *Src1 = legalize(Icmp->getSrc(1));
2732 Variable *Dest = Icmp->getDest(); 2722 Variable *Dest = Icmp->getDest();
2733 2723
2734 if (!isVectorType(Dest->getType())) 2724 if (!isVectorType(Dest->getType()))
2735 llvm::report_fatal_error("Expected a vector compare"); 2725 llvm::report_fatal_error("Expected a vector compare");
2736 2726
2737 Type Ty = Src0->getType(); 2727 Type Ty = Src0->getType();
2738 // Promote i1 vectors to 128 bit integer vector types. 2728 // Promote i1 vectors to 128 bit integer vector types.
2739 if (typeElementType(Ty) == IceType_i1) { 2729 if (typeElementType(Ty) == IceType_i1) {
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2781 Src0RM = T0; 2771 Src0RM = T0;
2782 Src1RM = T1; 2772 Src1RM = T1;
2783 } 2773 }
2784 2774
2785 Variable *T = makeReg(Ty); 2775 Variable *T = makeReg(Ty);
2786 switch (Condition) { 2776 switch (Condition) {
2787 default: 2777 default:
2788 llvm_unreachable("unexpected condition"); 2778 llvm_unreachable("unexpected condition");
2789 break; 2779 break;
2790 case InstIcmp::Eq: { 2780 case InstIcmp::Eq: {
2791 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) 2781 if (llvm::isa<X86OperandMem>(Src1RM))
2792 Src1RM = legalizeToReg(Src1RM); 2782 Src1RM = legalizeToReg(Src1RM);
2793 _movp(T, Src0RM); 2783 _movp(T, Src0RM);
2794 _pcmpeq(T, Src1RM); 2784 _pcmpeq(T, Src1RM);
2795 } break; 2785 } break;
2796 case InstIcmp::Ne: { 2786 case InstIcmp::Ne: {
2797 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) 2787 if (llvm::isa<X86OperandMem>(Src1RM))
2798 Src1RM = legalizeToReg(Src1RM); 2788 Src1RM = legalizeToReg(Src1RM);
2799 _movp(T, Src0RM); 2789 _movp(T, Src0RM);
2800 _pcmpeq(T, Src1RM); 2790 _pcmpeq(T, Src1RM);
2801 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2791 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2802 _pxor(T, MinusOne); 2792 _pxor(T, MinusOne);
2803 } break; 2793 } break;
2804 case InstIcmp::Ugt: 2794 case InstIcmp::Ugt:
2805 case InstIcmp::Sgt: { 2795 case InstIcmp::Sgt: {
2806 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) 2796 if (llvm::isa<X86OperandMem>(Src1RM))
2807 Src1RM = legalizeToReg(Src1RM); 2797 Src1RM = legalizeToReg(Src1RM);
2808 _movp(T, Src0RM); 2798 _movp(T, Src0RM);
2809 _pcmpgt(T, Src1RM); 2799 _pcmpgt(T, Src1RM);
2810 } break; 2800 } break;
2811 case InstIcmp::Uge: 2801 case InstIcmp::Uge:
2812 case InstIcmp::Sge: { 2802 case InstIcmp::Sge: {
2813 // !(Src1RM > Src0RM) 2803 // !(Src1RM > Src0RM)
2814 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2804 if (llvm::isa<X86OperandMem>(Src0RM))
2815 Src0RM = legalizeToReg(Src0RM); 2805 Src0RM = legalizeToReg(Src0RM);
2816 _movp(T, Src1RM); 2806 _movp(T, Src1RM);
2817 _pcmpgt(T, Src0RM); 2807 _pcmpgt(T, Src0RM);
2818 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2808 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2819 _pxor(T, MinusOne); 2809 _pxor(T, MinusOne);
2820 } break; 2810 } break;
2821 case InstIcmp::Ult: 2811 case InstIcmp::Ult:
2822 case InstIcmp::Slt: { 2812 case InstIcmp::Slt: {
2823 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2813 if (llvm::isa<X86OperandMem>(Src0RM))
2824 Src0RM = legalizeToReg(Src0RM); 2814 Src0RM = legalizeToReg(Src0RM);
2825 _movp(T, Src1RM); 2815 _movp(T, Src1RM);
2826 _pcmpgt(T, Src0RM); 2816 _pcmpgt(T, Src0RM);
2827 } break; 2817 } break;
2828 case InstIcmp::Ule: 2818 case InstIcmp::Ule:
2829 case InstIcmp::Sle: { 2819 case InstIcmp::Sle: {
2830 // !(Src0RM > Src1RM) 2820 // !(Src0RM > Src1RM)
2831 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) 2821 if (llvm::isa<X86OperandMem>(Src1RM))
2832 Src1RM = legalizeToReg(Src1RM); 2822 Src1RM = legalizeToReg(Src1RM);
2833 _movp(T, Src0RM); 2823 _movp(T, Src0RM);
2834 _pcmpgt(T, Src1RM); 2824 _pcmpgt(T, Src1RM);
2835 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2825 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2836 _pxor(T, MinusOne); 2826 _pxor(T, MinusOne);
2837 } break; 2827 } break;
2838 } 2828 }
2839 2829
2840 _movp(Dest, T); 2830 _movp(Dest, T);
2841 eliminateNextVectorSextInstruction(Dest); 2831 eliminateNextVectorSextInstruction(Dest);
2842 } 2832 }
2843 2833
2844 template <typename Machine> 2834 template <typename TraitsType>
2845 template <typename T> 2835 template <typename T>
2846 typename std::enable_if<!T::Is64Bit, void>::type 2836 typename std::enable_if<!T::Is64Bit, void>::type
2847 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, 2837 TargetX86Base<TraitsType>::lowerIcmp64(const InstIcmp *Icmp,
2848 const Inst *Consumer) { 2838 const Inst *Consumer) {
2849 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2839 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2850 Operand *Src0 = legalize(Icmp->getSrc(0)); 2840 Operand *Src0 = legalize(Icmp->getSrc(0));
2851 Operand *Src1 = legalize(Icmp->getSrc(1)); 2841 Operand *Src1 = legalize(Icmp->getSrc(1));
2852 Variable *Dest = Icmp->getDest(); 2842 Variable *Dest = Icmp->getDest();
2853 InstIcmp::ICond Condition = Icmp->getCondition(); 2843 InstIcmp::ICond Condition = Icmp->getCondition();
2854 size_t Index = static_cast<size_t>(Condition); 2844 size_t Index = static_cast<size_t>(Condition);
2855 assert(Index < Traits::TableIcmp64Size); 2845 assert(Index < Traits::TableIcmp64Size);
2856 Operand *Src0LoRM = nullptr; 2846 Operand *Src0LoRM = nullptr;
2857 Operand *Src0HiRM = nullptr; 2847 Operand *Src0HiRM = nullptr;
2858 // Legalize the portions of Src0 that are going to be needed. 2848 // Legalize the portions of Src0 that are going to be needed.
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
2933 case InstIcmp::Sle: 2923 case InstIcmp::Sle:
2934 break; 2924 break;
2935 } 2925 }
2936 } 2926 }
2937 // Handle general compares. 2927 // Handle general compares.
2938 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2928 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2939 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2929 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2940 if (Consumer == nullptr) { 2930 if (Consumer == nullptr) {
2941 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); 2931 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
2942 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); 2932 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
2943 typename Traits::Insts::Label *LabelFalse = 2933 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
2944 Traits::Insts::Label::create(Func, this); 2934 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
2945 typename Traits::Insts::Label *LabelTrue =
2946 Traits::Insts::Label::create(Func, this);
2947 _mov(Dest, One); 2935 _mov(Dest, One);
2948 _cmp(Src0HiRM, Src1HiRI); 2936 _cmp(Src0HiRM, Src1HiRI);
2949 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2937 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2950 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 2938 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2951 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2939 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2952 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 2940 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2953 _cmp(Src0LoRM, Src1LoRI); 2941 _cmp(Src0LoRM, Src1LoRI);
2954 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 2942 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2955 Context.insert(LabelFalse); 2943 Context.insert(LabelFalse);
2956 _redefined(_mov(Dest, Zero)); 2944 _redefined(_mov(Dest, Zero));
2957 Context.insert(LabelTrue); 2945 Context.insert(LabelTrue);
2958 return; 2946 return;
2959 } 2947 }
2960 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { 2948 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2961 _cmp(Src0HiRM, Src1HiRI); 2949 _cmp(Src0HiRM, Src1HiRI);
2962 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2950 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2963 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); 2951 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue());
2964 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2952 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2965 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); 2953 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse());
2966 _cmp(Src0LoRM, Src1LoRI); 2954 _cmp(Src0LoRM, Src1LoRI);
2967 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), 2955 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(),
2968 Br->getTargetFalse()); 2956 Br->getTargetFalse());
2969 return; 2957 return;
2970 } 2958 }
2971 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { 2959 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2972 Operand *SrcT = Select->getTrueOperand(); 2960 Operand *SrcT = Select->getTrueOperand();
2973 Operand *SrcF = Select->getFalseOperand(); 2961 Operand *SrcF = Select->getFalseOperand();
2974 Variable *SelectDest = Select->getDest(); 2962 Variable *SelectDest = Select->getDest();
2975 typename Traits::Insts::Label *LabelFalse = 2963 InstX86Label *LabelFalse = InstX86Label::create(Func, this);
2976 Traits::Insts::Label::create(Func, this); 2964 InstX86Label *LabelTrue = InstX86Label::create(Func, this);
2977 typename Traits::Insts::Label *LabelTrue =
2978 Traits::Insts::Label::create(Func, this);
2979 lowerMove(SelectDest, SrcT, false); 2965 lowerMove(SelectDest, SrcT, false);
2980 _cmp(Src0HiRM, Src1HiRI); 2966 _cmp(Src0HiRM, Src1HiRI);
2981 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2967 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2982 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 2968 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2983 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2969 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2984 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 2970 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2985 _cmp(Src0LoRM, Src1LoRI); 2971 _cmp(Src0LoRM, Src1LoRI);
2986 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 2972 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2987 Context.insert(LabelFalse); 2973 Context.insert(LabelFalse);
2988 static constexpr bool IsRedefinition = true; 2974 static constexpr bool IsRedefinition = true;
2989 lowerMove(SelectDest, SrcF, IsRedefinition); 2975 lowerMove(SelectDest, SrcF, IsRedefinition);
2990 Context.insert(LabelTrue); 2976 Context.insert(LabelTrue);
2991 return; 2977 return;
2992 } 2978 }
2993 llvm::report_fatal_error("Unexpected consumer type"); 2979 llvm::report_fatal_error("Unexpected consumer type");
2994 } 2980 }
2995 2981
2996 template <class Machine> 2982 template <typename TraitsType>
2997 void TargetX86Base<Machine>::setccOrConsumer( 2983 void TargetX86Base<TraitsType>::setccOrConsumer(BrCond Condition,
2998 typename Traits::Cond::BrCond Condition, Variable *Dest, 2984 Variable *Dest,
2999 const Inst *Consumer) { 2985 const Inst *Consumer) {
3000 if (Consumer == nullptr) { 2986 if (Consumer == nullptr) {
3001 _setcc(Dest, Condition); 2987 _setcc(Dest, Condition);
3002 return; 2988 return;
3003 } 2989 }
3004 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { 2990 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3005 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); 2991 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
3006 return; 2992 return;
3007 } 2993 }
3008 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { 2994 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3009 Operand *SrcT = Select->getTrueOperand(); 2995 Operand *SrcT = Select->getTrueOperand();
3010 Operand *SrcF = Select->getFalseOperand(); 2996 Operand *SrcF = Select->getFalseOperand();
3011 Variable *SelectDest = Select->getDest(); 2997 Variable *SelectDest = Select->getDest();
3012 lowerSelectMove(SelectDest, Condition, SrcT, SrcF); 2998 lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
3013 return; 2999 return;
3014 } 3000 }
3015 llvm::report_fatal_error("Unexpected consumer type"); 3001 llvm::report_fatal_error("Unexpected consumer type");
3016 } 3002 }
3017 3003
3018 template <class Machine> 3004 template <typename TraitsType>
3019 void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest, 3005 void TargetX86Base<TraitsType>::movOrConsumer(bool IcmpResult, Variable *Dest,
3020 const Inst *Consumer) { 3006 const Inst *Consumer) {
3021 if (Consumer == nullptr) { 3007 if (Consumer == nullptr) {
3022 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 3008 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
3023 return; 3009 return;
3024 } 3010 }
3025 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { 3011 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3026 // TODO(sehr,stichnot): This could be done with a single unconditional 3012 // TODO(sehr,stichnot): This could be done with a single unconditional
3027 // branch instruction, but subzero doesn't know how to handle the resulting 3013 // branch instruction, but subzero doesn't know how to handle the resulting
3028 // control flow graph changes now. Make it do so to eliminate mov and cmp. 3014 // control flow graph changes now. Make it do so to eliminate mov and cmp.
3029 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 3015 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
3030 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); 3016 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
3031 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 3017 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3032 return; 3018 return;
3033 } 3019 }
3034 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { 3020 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3035 Operand *Src = nullptr; 3021 Operand *Src = nullptr;
3036 if (IcmpResult) { 3022 if (IcmpResult) {
3037 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); 3023 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
3038 } else { 3024 } else {
3039 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); 3025 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
3040 } 3026 }
3041 Variable *SelectDest = Select->getDest(); 3027 Variable *SelectDest = Select->getDest();
3042 lowerMove(SelectDest, Src, false); 3028 lowerMove(SelectDest, Src, false);
3043 return; 3029 return;
3044 } 3030 }
3045 llvm::report_fatal_error("Unexpected consumer type"); 3031 llvm::report_fatal_error("Unexpected consumer type");
3046 } 3032 }
3047 3033
3048 template <class Machine> 3034 template <typename TraitsType>
3049 void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith, 3035 void TargetX86Base<TraitsType>::lowerArithAndConsumer(
3050 const Inst *Consumer) { 3036 const InstArithmetic *Arith, const Inst *Consumer) {
3051 Variable *T = nullptr; 3037 Variable *T = nullptr;
3052 Operand *Src0 = legalize(Arith->getSrc(0)); 3038 Operand *Src0 = legalize(Arith->getSrc(0));
3053 Operand *Src1 = legalize(Arith->getSrc(1)); 3039 Operand *Src1 = legalize(Arith->getSrc(1));
3054 Variable *Dest = Arith->getDest(); 3040 Variable *Dest = Arith->getDest();
3055 switch (Arith->getOp()) { 3041 switch (Arith->getOp()) {
3056 default: 3042 default:
3057 llvm_unreachable("arithmetic operator not AND or OR"); 3043 llvm_unreachable("arithmetic operator not AND or OR");
3058 break; 3044 break;
3059 case InstArithmetic::And: 3045 case InstArithmetic::And:
3060 _mov(T, Src0); 3046 _mov(T, Src0);
(...skipping 16 matching lines...) Expand all
3077 } 3063 }
3078 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { 3064 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3079 Context.insert<InstFakeUse>(T); 3065 Context.insert<InstFakeUse>(T);
3080 Context.insert<InstFakeDef>(Dest); 3066 Context.insert<InstFakeDef>(Dest);
3081 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 3067 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3082 return; 3068 return;
3083 } 3069 }
3084 llvm::report_fatal_error("Unexpected consumer type"); 3070 llvm::report_fatal_error("Unexpected consumer type");
3085 } 3071 }
3086 3072
3087 template <class Machine> 3073 template <typename TraitsType>
3088 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 3074 void TargetX86Base<TraitsType>::lowerInsertElement(
3075 const InstInsertElement *Inst) {
3089 Operand *SourceVectNotLegalized = Inst->getSrc(0); 3076 Operand *SourceVectNotLegalized = Inst->getSrc(0);
3090 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 3077 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
3091 ConstantInteger32 *ElementIndex = 3078 ConstantInteger32 *ElementIndex =
3092 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 3079 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
3093 // Only constant indices are allowed in PNaCl IR. 3080 // Only constant indices are allowed in PNaCl IR.
3094 assert(ElementIndex); 3081 assert(ElementIndex);
3095 unsigned Index = ElementIndex->getValue(); 3082 unsigned Index = ElementIndex->getValue();
3096 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 3083 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3097 3084
3098 Type Ty = SourceVectNotLegalized->getType(); 3085 Type Ty = SourceVectNotLegalized->getType();
(...skipping 17 matching lines...) Expand all
3116 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 3103 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3117 Operand *SourceVectRM = 3104 Operand *SourceVectRM =
3118 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3105 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3119 Variable *T = makeReg(Ty); 3106 Variable *T = makeReg(Ty);
3120 _movp(T, SourceVectRM); 3107 _movp(T, SourceVectRM);
3121 if (Ty == IceType_v4f32) { 3108 if (Ty == IceType_v4f32) {
3122 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 3109 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3123 } else { 3110 } else {
3124 // For the pinsrb and pinsrw instructions, when the source operand is a 3111 // For the pinsrb and pinsrw instructions, when the source operand is a
3125 // register, it must be a full r32 register like eax, and not ax/al/ah. 3112 // register, it must be a full r32 register like eax, and not ax/al/ah.
3126 // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use 3113 // For filetype=asm, InstX86Pinsr<TraitsType>::emit() compensates for
3114 // the use
3127 // of r16 and r8 by converting them through getBaseReg(), while emitIAS() 3115 // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
3128 // validates that the original and base register encodings are the same. 3116 // validates that the original and base register encodings are the same.
3129 if (ElementRM->getType() == IceType_i8 && 3117 if (ElementRM->getType() == IceType_i8 &&
3130 llvm::isa<Variable>(ElementRM)) { 3118 llvm::isa<Variable>(ElementRM)) {
3131 // Don't use ah/bh/ch/dh for pinsrb. 3119 // Don't use ah/bh/ch/dh for pinsrb.
3132 ElementRM = copyToReg8(ElementRM); 3120 ElementRM = copyToReg8(ElementRM);
3133 } 3121 }
3134 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); 3122 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
3135 } 3123 }
3136 _movp(Inst->getDest(), T); 3124 _movp(Inst->getDest(), T);
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
3201 // Spill the value to a stack slot and perform the insertion in memory. 3189 // Spill the value to a stack slot and perform the insertion in memory.
3202 // 3190 //
3203 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support 3191 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
3204 // for legalizing to mem is implemented. 3192 // for legalizing to mem is implemented.
3205 Variable *Slot = Func->makeVariable(Ty); 3193 Variable *Slot = Func->makeVariable(Ty);
3206 Slot->setMustNotHaveReg(); 3194 Slot->setMustNotHaveReg();
3207 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); 3195 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
3208 3196
3209 // Compute the location of the position to insert in memory. 3197 // Compute the location of the position to insert in memory.
3210 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 3198 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
3211 typename Traits::X86OperandMem *Loc = 3199 X86OperandMem *Loc =
3212 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 3200 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3213 _store(legalizeToReg(ElementToInsertNotLegalized), Loc); 3201 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
3214 3202
3215 Variable *T = makeReg(Ty); 3203 Variable *T = makeReg(Ty);
3216 _movp(T, Slot); 3204 _movp(T, Slot);
3217 _movp(Inst->getDest(), T); 3205 _movp(Inst->getDest(), T);
3218 } 3206 }
3219 } 3207 }
3220 3208
3221 template <class Machine> 3209 template <typename TraitsType>
3222 void TargetX86Base<Machine>::lowerIntrinsicCall( 3210 void TargetX86Base<TraitsType>::lowerIntrinsicCall(
3223 const InstIntrinsicCall *Instr) { 3211 const InstIntrinsicCall *Instr) {
3224 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { 3212 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3225 case Intrinsics::AtomicCmpxchg: { 3213 case Intrinsics::AtomicCmpxchg: {
3226 if (!Intrinsics::isMemoryOrderValid( 3214 if (!Intrinsics::isMemoryOrderValid(
3227 ID, getConstantMemoryOrder(Instr->getArg(3)), 3215 ID, getConstantMemoryOrder(Instr->getArg(3)),
3228 getConstantMemoryOrder(Instr->getArg(4)))) { 3216 getConstantMemoryOrder(Instr->getArg(4)))) {
3229 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 3217 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3230 return; 3218 return;
3231 } 3219 }
3232 Variable *DestPrev = Instr->getDest(); 3220 Variable *DestPrev = Instr->getDest();
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
3292 } 3280 }
3293 Variable *Dest = Instr->getDest(); 3281 Variable *Dest = Instr->getDest();
3294 if (!Traits::Is64Bit) { 3282 if (!Traits::Is64Bit) {
3295 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { 3283 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) {
3296 // Follow what GCC does and use a movq instead of what lowerLoad() 3284 // Follow what GCC does and use a movq instead of what lowerLoad()
3297 // normally does (split the load into two). Thus, this skips 3285 // normally does (split the load into two). Thus, this skips
3298 // load/arithmetic op folding. Load/arithmetic folding can't happen 3286 // load/arithmetic op folding. Load/arithmetic folding can't happen
3299 // anyway, since this is x86-32 and integer arithmetic only happens on 3287 // anyway, since this is x86-32 and integer arithmetic only happens on
3300 // 32-bit quantities. 3288 // 32-bit quantities.
3301 Variable *T = makeReg(IceType_f64); 3289 Variable *T = makeReg(IceType_f64);
3302 typename Traits::X86OperandMem *Addr = 3290 X86OperandMem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
3303 formMemoryOperand(Instr->getArg(0), IceType_f64);
3304 _movq(T, Addr); 3291 _movq(T, Addr);
3305 // Then cast the bits back out of the XMM register to the i64 Dest. 3292 // Then cast the bits back out of the XMM register to the i64 Dest.
3306 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); 3293 auto *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3307 lowerCast(Cast); 3294 lowerCast(Cast);
3308 // Make sure that the atomic load isn't elided when unused. 3295 // Make sure that the atomic load isn't elided when unused.
3309 Context.insert<InstFakeUse>(Dest64On32->getLo()); 3296 Context.insert<InstFakeUse>(Dest64On32->getLo());
3310 Context.insert<InstFakeUse>(Dest64On32->getHi()); 3297 Context.insert<InstFakeUse>(Dest64On32->getHi());
3311 return; 3298 return;
3312 } 3299 }
3313 } 3300 }
(...skipping 29 matching lines...) Expand all
3343 Operand *Value = Instr->getArg(0); 3330 Operand *Value = Instr->getArg(0);
3344 Operand *Ptr = Instr->getArg(1); 3331 Operand *Ptr = Instr->getArg(1);
3345 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { 3332 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
3346 // Use a movq instead of what lowerStore() normally does (split the store 3333 // Use a movq instead of what lowerStore() normally does (split the store
3347 // into two), following what GCC does. Cast the bits from int -> to an 3334 // into two), following what GCC does. Cast the bits from int -> to an
3348 // xmm register first. 3335 // xmm register first.
3349 Variable *T = makeReg(IceType_f64); 3336 Variable *T = makeReg(IceType_f64);
3350 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 3337 auto *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3351 lowerCast(Cast); 3338 lowerCast(Cast);
3352 // Then store XMM w/ a movq. 3339 // Then store XMM w/ a movq.
3353 typename Traits::X86OperandMem *Addr = 3340 X86OperandMem *Addr = formMemoryOperand(Ptr, IceType_f64);
3354 formMemoryOperand(Ptr, IceType_f64);
3355 _storeq(T, Addr); 3341 _storeq(T, Addr);
3356 _mfence(); 3342 _mfence();
3357 return; 3343 return;
3358 } 3344 }
3359 auto *Store = InstStore::create(Func, Value, Ptr); 3345 auto *Store = InstStore::create(Func, Value, Ptr);
3360 lowerStore(Store); 3346 lowerStore(Store);
3361 _mfence(); 3347 _mfence();
3362 return; 3348 return;
3363 } 3349 }
3364 case Intrinsics::Bswap: { 3350 case Intrinsics::Bswap: {
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
3488 return; 3474 return;
3489 } 3475 }
3490 case Intrinsics::Fabs: { 3476 case Intrinsics::Fabs: {
3491 Operand *Src = legalize(Instr->getArg(0)); 3477 Operand *Src = legalize(Instr->getArg(0));
3492 Type Ty = Src->getType(); 3478 Type Ty = Src->getType();
3493 Variable *Dest = Instr->getDest(); 3479 Variable *Dest = Instr->getDest();
3494 Variable *T = makeVectorOfFabsMask(Ty); 3480 Variable *T = makeVectorOfFabsMask(Ty);
3495 // The pand instruction operates on an m128 memory operand, so if Src is an 3481 // The pand instruction operates on an m128 memory operand, so if Src is an
3496 // f32 or f64, we need to make sure it's in a register. 3482 // f32 or f64, we need to make sure it's in a register.
3497 if (isVectorType(Ty)) { 3483 if (isVectorType(Ty)) {
3498 if (llvm::isa<typename Traits::X86OperandMem>(Src)) 3484 if (llvm::isa<X86OperandMem>(Src))
3499 Src = legalizeToReg(Src); 3485 Src = legalizeToReg(Src);
3500 } else { 3486 } else {
3501 Src = legalizeToReg(Src); 3487 Src = legalizeToReg(Src);
3502 } 3488 }
3503 _pand(T, Src); 3489 _pand(T, Src);
3504 if (isVectorType(Ty)) 3490 if (isVectorType(Ty))
3505 _movp(Dest, T); 3491 _movp(Dest, T);
3506 else 3492 else
3507 _mov(Dest, T); 3493 _mov(Dest, T);
3508 return; 3494 return;
(...skipping 12 matching lines...) Expand all
3521 case Intrinsics::Memmove: { 3507 case Intrinsics::Memmove: {
3522 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); 3508 lowerMemmove(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
3523 return; 3509 return;
3524 } 3510 }
3525 case Intrinsics::Memset: { 3511 case Intrinsics::Memset: {
3526 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); 3512 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
3527 return; 3513 return;
3528 } 3514 }
3529 case Intrinsics::NaClReadTP: { 3515 case Intrinsics::NaClReadTP: {
3530 if (Ctx->getFlags().getUseSandboxing()) { 3516 if (Ctx->getFlags().getUseSandboxing()) {
3531 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); 3517 Operand *Src =
3518 dispatchToConcrete(&ConcreteTarget::createNaClReadTPSrcOperand);
3532 Variable *Dest = Instr->getDest(); 3519 Variable *Dest = Instr->getDest();
3533 Variable *T = nullptr; 3520 Variable *T = nullptr;
3534 _mov(T, Src); 3521 _mov(T, Src);
3535 _mov(Dest, T); 3522 _mov(Dest, T);
3536 } else { 3523 } else {
3537 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); 3524 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3538 lowerCall(Call); 3525 lowerCall(Call);
3539 } 3526 }
3540 return; 3527 return;
3541 } 3528 }
(...skipping 29 matching lines...) Expand all
3571 case Intrinsics::Trap: 3558 case Intrinsics::Trap:
3572 _ud2(); 3559 _ud2();
3573 return; 3560 return;
3574 case Intrinsics::UnknownIntrinsic: 3561 case Intrinsics::UnknownIntrinsic:
3575 Func->setError("Should not be lowering UnknownIntrinsic"); 3562 Func->setError("Should not be lowering UnknownIntrinsic");
3576 return; 3563 return;
3577 } 3564 }
3578 return; 3565 return;
3579 } 3566 }
3580 3567
3581 template <class Machine> 3568 template <typename TraitsType>
3582 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3569 void TargetX86Base<TraitsType>::lowerAtomicCmpxchg(Variable *DestPrev,
3583 Operand *Ptr, Operand *Expected, 3570 Operand *Ptr,
3584 Operand *Desired) { 3571 Operand *Expected,
3572 Operand *Desired) {
3585 Type Ty = Expected->getType(); 3573 Type Ty = Expected->getType();
3586 if (!Traits::Is64Bit && Ty == IceType_i64) { 3574 if (!Traits::Is64Bit && Ty == IceType_i64) {
3587 // Reserve the pre-colored registers first, before adding any more 3575 // Reserve the pre-colored registers first, before adding any more
3588 // infinite-weight variables from formMemoryOperand's legalization. 3576 // infinite-weight variables from formMemoryOperand's legalization.
3589 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3577 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3590 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3578 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3591 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3579 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3592 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3580 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3593 _mov(T_eax, loOperand(Expected)); 3581 _mov(T_eax, loOperand(Expected));
3594 _mov(T_edx, hiOperand(Expected)); 3582 _mov(T_edx, hiOperand(Expected));
3595 _mov(T_ebx, loOperand(Desired)); 3583 _mov(T_ebx, loOperand(Desired));
3596 _mov(T_ecx, hiOperand(Desired)); 3584 _mov(T_ecx, hiOperand(Desired));
3597 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3585 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3598 constexpr bool Locked = true; 3586 constexpr bool Locked = true;
3599 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3587 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3600 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3588 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3601 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3589 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3602 _mov(DestLo, T_eax); 3590 _mov(DestLo, T_eax);
3603 _mov(DestHi, T_edx); 3591 _mov(DestHi, T_edx);
3604 return; 3592 return;
3605 } 3593 }
3606 int32_t Eax; 3594 int32_t Eax;
3607 switch (Ty) { 3595 switch (Ty) {
3608 default: 3596 default:
3609 llvm::report_fatal_error("Bad type for cmpxchg"); 3597 llvm::report_fatal_error("Bad type for cmpxchg");
3610 case IceType_i64: 3598 case IceType_i64:
3611 Eax = Traits::getRaxOrDie(); 3599 Eax = Traits::getRaxOrDie();
3612 break; 3600 break;
3613 case IceType_i32: 3601 case IceType_i32:
3614 Eax = Traits::RegisterSet::Reg_eax; 3602 Eax = Traits::RegisterSet::Reg_eax;
3615 break; 3603 break;
3616 case IceType_i16: 3604 case IceType_i16:
3617 Eax = Traits::RegisterSet::Reg_ax; 3605 Eax = Traits::RegisterSet::Reg_ax;
3618 break; 3606 break;
3619 case IceType_i8: 3607 case IceType_i8:
3620 Eax = Traits::RegisterSet::Reg_al; 3608 Eax = Traits::RegisterSet::Reg_al;
3621 break; 3609 break;
3622 } 3610 }
3623 Variable *T_eax = makeReg(Ty, Eax); 3611 Variable *T_eax = makeReg(Ty, Eax);
3624 _mov(T_eax, Expected); 3612 _mov(T_eax, Expected);
3625 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3613 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3626 Variable *DesiredReg = legalizeToReg(Desired); 3614 Variable *DesiredReg = legalizeToReg(Desired);
3627 constexpr bool Locked = true; 3615 constexpr bool Locked = true;
3628 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3616 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3629 _mov(DestPrev, T_eax); 3617 _mov(DestPrev, T_eax);
3630 } 3618 }
3631 3619
3632 template <class Machine> 3620 template <typename TraitsType>
3633 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 3621 bool TargetX86Base<TraitsType>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3634 Operand *PtrToMem, 3622 Operand *PtrToMem,
3635 Operand *Expected, 3623 Operand *Expected,
3636 Operand *Desired) { 3624 Operand *Desired) {
3637 if (Ctx->getFlags().getOptLevel() == Opt_m1) 3625 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3638 return false; 3626 return false;
3639 // Peek ahead a few instructions and see how Dest is used. 3627 // Peek ahead a few instructions and see how Dest is used.
3640 // It's very common to have: 3628 // It's very common to have:
3641 // 3629 //
3642 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) 3630 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3643 // [%y_phi = ...] // list of phi stores 3631 // [%y_phi = ...] // list of phi stores
3644 // %p = icmp eq i32 %x, %expected 3632 // %p = icmp eq i32 %x, %expected
3645 // br i1 %p, label %l1, label %l2 3633 // br i1 %p, label %l1, label %l2
3646 // 3634 //
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
3698 NextBr->setDeleted(); 3686 NextBr->setDeleted();
3699 Context.advanceNext(); 3687 Context.advanceNext();
3700 Context.advanceNext(); 3688 Context.advanceNext();
3701 return true; 3689 return true;
3702 } 3690 }
3703 } 3691 }
3704 } 3692 }
3705 return false; 3693 return false;
3706 } 3694 }
3707 3695
3708 template <class Machine> 3696 template <typename TraitsType>
3709 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3697 void TargetX86Base<TraitsType>::lowerAtomicRMW(Variable *Dest,
3710 Operand *Ptr, Operand *Val) { 3698 uint32_t Operation, Operand *Ptr,
3699 Operand *Val) {
3711 bool NeedsCmpxchg = false; 3700 bool NeedsCmpxchg = false;
3712 LowerBinOp Op_Lo = nullptr; 3701 LowerBinOp Op_Lo = nullptr;
3713 LowerBinOp Op_Hi = nullptr; 3702 LowerBinOp Op_Hi = nullptr;
3714 switch (Operation) { 3703 switch (Operation) {
3715 default: 3704 default:
3716 Func->setError("Unknown AtomicRMW operation"); 3705 Func->setError("Unknown AtomicRMW operation");
3717 return; 3706 return;
3718 case Intrinsics::AtomicAdd: { 3707 case Intrinsics::AtomicAdd: {
3719 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3708 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3720 // All the fall-through paths must set this to true, but use this 3709 // All the fall-through paths must set this to true, but use this
3721 // for asserting. 3710 // for asserting.
3722 NeedsCmpxchg = true; 3711 NeedsCmpxchg = true;
3723 Op_Lo = &TargetX86Base<Machine>::_add; 3712 Op_Lo = &TargetX86Base<TraitsType>::_add;
3724 Op_Hi = &TargetX86Base<Machine>::_adc; 3713 Op_Hi = &TargetX86Base<TraitsType>::_adc;
3725 break; 3714 break;
3726 } 3715 }
3727 typename Traits::X86OperandMem *Addr = 3716 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
3728 formMemoryOperand(Ptr, Dest->getType());
3729 constexpr bool Locked = true; 3717 constexpr bool Locked = true;
3730 Variable *T = nullptr; 3718 Variable *T = nullptr;
3731 _mov(T, Val); 3719 _mov(T, Val);
3732 _xadd(Addr, T, Locked); 3720 _xadd(Addr, T, Locked);
3733 _mov(Dest, T); 3721 _mov(Dest, T);
3734 return; 3722 return;
3735 } 3723 }
3736 case Intrinsics::AtomicSub: { 3724 case Intrinsics::AtomicSub: {
3737 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3725 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3738 NeedsCmpxchg = true; 3726 NeedsCmpxchg = true;
3739 Op_Lo = &TargetX86Base<Machine>::_sub; 3727 Op_Lo = &TargetX86Base<TraitsType>::_sub;
3740 Op_Hi = &TargetX86Base<Machine>::_sbb; 3728 Op_Hi = &TargetX86Base<TraitsType>::_sbb;
3741 break; 3729 break;
3742 } 3730 }
3743 typename Traits::X86OperandMem *Addr = 3731 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
3744 formMemoryOperand(Ptr, Dest->getType());
3745 constexpr bool Locked = true; 3732 constexpr bool Locked = true;
3746 Variable *T = nullptr; 3733 Variable *T = nullptr;
3747 _mov(T, Val); 3734 _mov(T, Val);
3748 _neg(T); 3735 _neg(T);
3749 _xadd(Addr, T, Locked); 3736 _xadd(Addr, T, Locked);
3750 _mov(Dest, T); 3737 _mov(Dest, T);
3751 return; 3738 return;
3752 } 3739 }
3753 case Intrinsics::AtomicOr: 3740 case Intrinsics::AtomicOr:
3754 // TODO(jvoung): If Dest is null or dead, then some of these 3741 // TODO(jvoung): If Dest is null or dead, then some of these
3755 // operations do not need an "exchange", but just a locked op. 3742 // operations do not need an "exchange", but just a locked op.
3756 // That appears to be "worth" it for sub, or, and, and xor. 3743 // That appears to be "worth" it for sub, or, and, and xor.
3757 // xadd is probably fine vs lock add for add, and xchg is fine 3744 // xadd is probably fine vs lock add for add, and xchg is fine
3758 // vs an atomic store. 3745 // vs an atomic store.
3759 NeedsCmpxchg = true; 3746 NeedsCmpxchg = true;
3760 Op_Lo = &TargetX86Base<Machine>::_or; 3747 Op_Lo = &TargetX86Base<TraitsType>::_or;
3761 Op_Hi = &TargetX86Base<Machine>::_or; 3748 Op_Hi = &TargetX86Base<TraitsType>::_or;
3762 break; 3749 break;
3763 case Intrinsics::AtomicAnd: 3750 case Intrinsics::AtomicAnd:
3764 NeedsCmpxchg = true; 3751 NeedsCmpxchg = true;
3765 Op_Lo = &TargetX86Base<Machine>::_and; 3752 Op_Lo = &TargetX86Base<TraitsType>::_and;
3766 Op_Hi = &TargetX86Base<Machine>::_and; 3753 Op_Hi = &TargetX86Base<TraitsType>::_and;
3767 break; 3754 break;
3768 case Intrinsics::AtomicXor: 3755 case Intrinsics::AtomicXor:
3769 NeedsCmpxchg = true; 3756 NeedsCmpxchg = true;
3770 Op_Lo = &TargetX86Base<Machine>::_xor; 3757 Op_Lo = &TargetX86Base<TraitsType>::_xor;
3771 Op_Hi = &TargetX86Base<Machine>::_xor; 3758 Op_Hi = &TargetX86Base<TraitsType>::_xor;
3772 break; 3759 break;
3773 case Intrinsics::AtomicExchange: 3760 case Intrinsics::AtomicExchange:
3774 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3761 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3775 NeedsCmpxchg = true; 3762 NeedsCmpxchg = true;
3776 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3763 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3777 // just need to be moved to the ecx and ebx registers. 3764 // just need to be moved to the ecx and ebx registers.
3778 Op_Lo = nullptr; 3765 Op_Lo = nullptr;
3779 Op_Hi = nullptr; 3766 Op_Hi = nullptr;
3780 break; 3767 break;
3781 } 3768 }
3782 typename Traits::X86OperandMem *Addr = 3769 X86OperandMem *Addr = formMemoryOperand(Ptr, Dest->getType());
3783 formMemoryOperand(Ptr, Dest->getType());
3784 Variable *T = nullptr; 3770 Variable *T = nullptr;
3785 _mov(T, Val); 3771 _mov(T, Val);
3786 _xchg(Addr, T); 3772 _xchg(Addr, T);
3787 _mov(Dest, T); 3773 _mov(Dest, T);
3788 return; 3774 return;
3789 } 3775 }
3790 // Otherwise, we need a cmpxchg loop. 3776 // Otherwise, we need a cmpxchg loop.
3791 (void)NeedsCmpxchg; 3777 (void)NeedsCmpxchg;
3792 assert(NeedsCmpxchg); 3778 assert(NeedsCmpxchg);
3793 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); 3779 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3794 } 3780 }
3795 3781
3796 template <class Machine> 3782 template <typename TraitsType>
3797 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, 3783 void TargetX86Base<TraitsType>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3798 LowerBinOp Op_Hi, 3784 LowerBinOp Op_Hi,
3799 Variable *Dest, 3785 Variable *Dest,
3800 Operand *Ptr, 3786 Operand *Ptr,
3801 Operand *Val) { 3787 Operand *Val) {
3802 // Expand a more complex RMW operation as a cmpxchg loop: 3788 // Expand a more complex RMW operation as a cmpxchg loop:
3803 // For 64-bit: 3789 // For 64-bit:
3804 // mov eax, [ptr] 3790 // mov eax, [ptr]
3805 // mov edx, [ptr + 4] 3791 // mov edx, [ptr + 4]
3806 // .LABEL: 3792 // .LABEL:
3807 // mov ebx, eax 3793 // mov ebx, eax
3808 // <Op_Lo> ebx, <desired_adj_lo> 3794 // <Op_Lo> ebx, <desired_adj_lo>
3809 // mov ecx, edx 3795 // mov ecx, edx
3810 // <Op_Hi> ecx, <desired_adj_hi> 3796 // <Op_Hi> ecx, <desired_adj_hi>
3811 // lock cmpxchg8b [ptr] 3797 // lock cmpxchg8b [ptr]
3812 // jne .LABEL 3798 // jne .LABEL
3813 // mov <dest_lo>, eax 3799 // mov <dest_lo>, eax
3814 // mov <dest_lo>, edx 3800 // mov <dest_lo>, edx
3815 // 3801 //
3816 // For 32-bit: 3802 // For 32-bit:
3817 // mov eax, [ptr] 3803 // mov eax, [ptr]
3818 // .LABEL: 3804 // .LABEL:
3819 // mov <reg>, eax 3805 // mov <reg>, eax
3820 // op <reg>, [desired_adj] 3806 // op <reg>, [desired_adj]
3821 // lock cmpxchg [ptr], <reg> 3807 // lock cmpxchg [ptr], <reg>
3822 // jne .LABEL 3808 // jne .LABEL
3823 // mov <dest>, eax 3809 // mov <dest>, eax
3824 // 3810 //
3825 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3811 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3826 Val = legalize(Val); 3812 Val = legalize(Val);
3827 Type Ty = Val->getType(); 3813 Type Ty = Val->getType();
3828 if (!Traits::Is64Bit && Ty == IceType_i64) { 3814 if (!Traits::Is64Bit && Ty == IceType_i64) {
3829 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3815 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3830 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3816 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3831 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3817 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3832 _mov(T_eax, loOperand(Addr)); 3818 _mov(T_eax, loOperand(Addr));
3833 _mov(T_edx, hiOperand(Addr)); 3819 _mov(T_edx, hiOperand(Addr));
3834 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3820 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3835 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3821 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3836 typename Traits::Insts::Label *Label = 3822 InstX86Label *Label = InstX86Label::create(Func, this);
3837 Traits::Insts::Label::create(Func, this);
3838 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3823 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3839 if (!IsXchg8b) { 3824 if (!IsXchg8b) {
3840 Context.insert(Label); 3825 Context.insert(Label);
3841 _mov(T_ebx, T_eax); 3826 _mov(T_ebx, T_eax);
3842 (this->*Op_Lo)(T_ebx, loOperand(Val)); 3827 (this->*Op_Lo)(T_ebx, loOperand(Val));
3843 _mov(T_ecx, T_edx); 3828 _mov(T_ecx, T_edx);
3844 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3829 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3845 } else { 3830 } else {
3846 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3831 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3847 // It just needs the Val loaded into ebx and ecx. 3832 // It just needs the Val loaded into ebx and ecx.
(...skipping 21 matching lines...) Expand all
3869 } 3854 }
3870 // The address base (if any) is also reused in the loop. 3855 // The address base (if any) is also reused in the loop.
3871 if (Variable *Base = Addr->getBase()) 3856 if (Variable *Base = Addr->getBase())
3872 Context.insert<InstFakeUse>(Base); 3857 Context.insert<InstFakeUse>(Base);
3873 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3858 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3874 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3859 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3875 _mov(DestLo, T_eax); 3860 _mov(DestLo, T_eax);
3876 _mov(DestHi, T_edx); 3861 _mov(DestHi, T_edx);
3877 return; 3862 return;
3878 } 3863 }
3879 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3864 X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3880 int32_t Eax; 3865 int32_t Eax;
3881 switch (Ty) { 3866 switch (Ty) {
3882 default: 3867 default:
3883 llvm::report_fatal_error("Bad type for atomicRMW"); 3868 llvm::report_fatal_error("Bad type for atomicRMW");
3884 case IceType_i64: 3869 case IceType_i64:
3885 Eax = Traits::getRaxOrDie(); 3870 Eax = Traits::getRaxOrDie();
3886 break; 3871 break;
3887 case IceType_i32: 3872 case IceType_i32:
3888 Eax = Traits::RegisterSet::Reg_eax; 3873 Eax = Traits::RegisterSet::Reg_eax;
3889 break; 3874 break;
3890 case IceType_i16: 3875 case IceType_i16:
3891 Eax = Traits::RegisterSet::Reg_ax; 3876 Eax = Traits::RegisterSet::Reg_ax;
3892 break; 3877 break;
3893 case IceType_i8: 3878 case IceType_i8:
3894 Eax = Traits::RegisterSet::Reg_al; 3879 Eax = Traits::RegisterSet::Reg_al;
3895 break; 3880 break;
3896 } 3881 }
3897 Variable *T_eax = makeReg(Ty, Eax); 3882 Variable *T_eax = makeReg(Ty, Eax);
3898 _mov(T_eax, Addr); 3883 _mov(T_eax, Addr);
3899 auto *Label = Context.insert<typename Traits::Insts::Label>(this); 3884 auto *Label = Context.insert<InstX86Label>(this);
3900 // We want to pick a different register for T than Eax, so don't use 3885 // We want to pick a different register for T than Eax, so don't use
3901 // _mov(T == nullptr, T_eax). 3886 // _mov(T == nullptr, T_eax).
3902 Variable *T = makeReg(Ty); 3887 Variable *T = makeReg(Ty);
3903 _mov(T, T_eax); 3888 _mov(T, T_eax);
3904 (this->*Op_Lo)(T, Val); 3889 (this->*Op_Lo)(T, Val);
3905 constexpr bool Locked = true; 3890 constexpr bool Locked = true;
3906 _cmpxchg(Addr, T_eax, T, Locked); 3891 _cmpxchg(Addr, T_eax, T, Locked);
3907 _br(Traits::Cond::Br_ne, Label); 3892 _br(Traits::Cond::Br_ne, Label);
3908 // If Val is a variable, model the extended live range of Val through 3893 // If Val is a variable, model the extended live range of Val through
3909 // the end of the loop, since it will be re-used by the loop. 3894 // the end of the loop, since it will be re-used by the loop.
3910 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) { 3895 if (auto *ValVar = llvm::dyn_cast<Variable>(Val)) {
3911 Context.insert<InstFakeUse>(ValVar); 3896 Context.insert<InstFakeUse>(ValVar);
3912 } 3897 }
3913 // The address base (if any) is also reused in the loop. 3898 // The address base (if any) is also reused in the loop.
3914 if (Variable *Base = Addr->getBase()) 3899 if (Variable *Base = Addr->getBase())
3915 Context.insert<InstFakeUse>(Base); 3900 Context.insert<InstFakeUse>(Base);
3916 _mov(Dest, T_eax); 3901 _mov(Dest, T_eax);
3917 } 3902 }
3918 3903
3919 /// Lowers count {trailing, leading} zeros intrinsic. 3904 /// Lowers count {trailing, leading} zeros intrinsic.
3920 /// 3905 ///
3921 /// We could do constant folding here, but that should have 3906 /// We could do constant folding here, but that should have
3922 /// been done by the front-end/middle-end optimizations. 3907 /// been done by the front-end/middle-end optimizations.
3923 template <class Machine> 3908 template <typename TraitsType>
3924 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, 3909 void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty,
3925 Operand *FirstVal, 3910 Variable *Dest,
3926 Operand *SecondVal) { 3911 Operand *FirstVal,
3912 Operand *SecondVal) {
3927 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). 3913 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3928 // Then the instructions will handle the Val == 0 case much more simply 3914 // Then the instructions will handle the Val == 0 case much more simply
3929 // and won't require conversion from bit position to number of zeros. 3915 // and won't require conversion from bit position to number of zeros.
3930 // 3916 //
3931 // Otherwise: 3917 // Otherwise:
3932 // bsr IF_NOT_ZERO, Val 3918 // bsr IF_NOT_ZERO, Val
3933 // mov T_DEST, 63 3919 // mov T_DEST, 63
3934 // cmovne T_DEST, IF_NOT_ZERO 3920 // cmovne T_DEST, IF_NOT_ZERO
3935 // xor T_DEST, 31 3921 // xor T_DEST, 31
3936 // mov DEST, T_DEST 3922 // mov DEST, T_DEST
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
3988 } else { 3974 } else {
3989 _bsr(T_Dest2, SecondVar); 3975 _bsr(T_Dest2, SecondVar);
3990 _xor(T_Dest2, _31); 3976 _xor(T_Dest2, _31);
3991 } 3977 }
3992 _test(SecondVar, SecondVar); 3978 _test(SecondVar, SecondVar);
3993 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); 3979 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
3994 _mov(DestLo, T_Dest2); 3980 _mov(DestLo, T_Dest2);
3995 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 3981 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3996 } 3982 }
3997 3983
3998 template <class Machine> 3984 template <typename TraitsType>
3999 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, 3985 void TargetX86Base<TraitsType>::typedLoad(Type Ty, Variable *Dest,
4000 Constant *Offset) { 3986 Variable *Base, Constant *Offset) {
4001 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 3987 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
4002 3988
4003 if (isVectorType(Ty)) 3989 if (isVectorType(Ty))
4004 _movp(Dest, Mem); 3990 _movp(Dest, Mem);
4005 else if (Ty == IceType_f64) 3991 else if (Ty == IceType_f64)
4006 _movq(Dest, Mem); 3992 _movq(Dest, Mem);
4007 else 3993 else
4008 _mov(Dest, Mem); 3994 _mov(Dest, Mem);
4009 } 3995 }
4010 3996
4011 template <class Machine> 3997 template <typename TraitsType>
4012 void TargetX86Base<Machine>::typedStore(Type Ty, Variable *Value, 3998 void TargetX86Base<TraitsType>::typedStore(Type Ty, Variable *Value,
4013 Variable *Base, Constant *Offset) { 3999 Variable *Base, Constant *Offset) {
4014 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 4000 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
4015 4001
4016 if (isVectorType(Ty)) 4002 if (isVectorType(Ty))
4017 _storep(Value, Mem); 4003 _storep(Value, Mem);
4018 else if (Ty == IceType_f64) 4004 else if (Ty == IceType_f64)
4019 _storeq(Value, Mem); 4005 _storeq(Value, Mem);
4020 else 4006 else
4021 _store(Value, Mem); 4007 _store(Value, Mem);
4022 } 4008 }
4023 4009
4024 template <class Machine> 4010 template <typename TraitsType>
4025 void TargetX86Base<Machine>::copyMemory(Type Ty, Variable *Dest, Variable *Src, 4011 void TargetX86Base<TraitsType>::copyMemory(Type Ty, Variable *Dest,
4026 int32_t OffsetAmt) { 4012 Variable *Src, int32_t OffsetAmt) {
4027 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; 4013 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
4028 // TODO(ascull): this or add nullptr test to _movp, _movq 4014 // TODO(ascull): this or add nullptr test to _movp, _movq
4029 Variable *Data = makeReg(Ty); 4015 Variable *Data = makeReg(Ty);
4030 4016
4031 typedLoad(Ty, Data, Src, Offset); 4017 typedLoad(Ty, Data, Src, Offset);
4032 typedStore(Ty, Data, Dest, Offset); 4018 typedStore(Ty, Data, Dest, Offset);
4033 } 4019 }
4034 4020
4035 template <class Machine> 4021 template <typename TraitsType>
4036 void TargetX86Base<Machine>::lowerMemcpy(Operand *Dest, Operand *Src, 4022 void TargetX86Base<TraitsType>::lowerMemcpy(Operand *Dest, Operand *Src,
4037 Operand *Count) { 4023 Operand *Count) {
4038 // There is a load and store for each chunk in the unroll 4024 // There is a load and store for each chunk in the unroll
4039 constexpr uint32_t BytesPerStorep = 16; 4025 constexpr uint32_t BytesPerStorep = 16;
4040 4026
4041 // Check if the operands are constants 4027 // Check if the operands are constants
4042 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); 4028 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4043 const bool IsCountConst = CountConst != nullptr; 4029 const bool IsCountConst = CountConst != nullptr;
4044 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; 4030 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4045 4031
4046 if (shouldOptimizeMemIntrins() && IsCountConst && 4032 if (shouldOptimizeMemIntrins() && IsCountConst &&
4047 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) { 4033 CountValue <= BytesPerStorep * Traits::MEMCPY_UNROLL_LIMIT) {
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
4079 } 4065 }
4080 4066
4081 // Fall back on a function call 4067 // Fall back on a function call
4082 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3); 4068 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
4083 Call->addArg(Dest); 4069 Call->addArg(Dest);
4084 Call->addArg(Src); 4070 Call->addArg(Src);
4085 Call->addArg(Count); 4071 Call->addArg(Count);
4086 lowerCall(Call); 4072 lowerCall(Call);
4087 } 4073 }
4088 4074
4089 template <class Machine> 4075 template <typename TraitsType>
4090 void TargetX86Base<Machine>::lowerMemmove(Operand *Dest, Operand *Src, 4076 void TargetX86Base<TraitsType>::lowerMemmove(Operand *Dest, Operand *Src,
4091 Operand *Count) { 4077 Operand *Count) {
4092 // There is a load and store for each chunk in the unroll 4078 // There is a load and store for each chunk in the unroll
4093 constexpr uint32_t BytesPerStorep = 16; 4079 constexpr uint32_t BytesPerStorep = 16;
4094 4080
4095 // Check if the operands are constants 4081 // Check if the operands are constants
4096 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); 4082 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4097 const bool IsCountConst = CountConst != nullptr; 4083 const bool IsCountConst = CountConst != nullptr;
4098 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; 4084 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
4099 4085
4100 if (shouldOptimizeMemIntrins() && IsCountConst && 4086 if (shouldOptimizeMemIntrins() && IsCountConst &&
4101 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) { 4087 CountValue <= BytesPerStorep * Traits::MEMMOVE_UNROLL_LIMIT) {
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
4151 } 4137 }
4152 4138
4153 // Fall back on a function call 4139 // Fall back on a function call
4154 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); 4140 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
4155 Call->addArg(Dest); 4141 Call->addArg(Dest);
4156 Call->addArg(Src); 4142 Call->addArg(Src);
4157 Call->addArg(Count); 4143 Call->addArg(Count);
4158 lowerCall(Call); 4144 lowerCall(Call);
4159 } 4145 }
4160 4146
4161 template <class Machine> 4147 template <typename TraitsType>
4162 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, 4148 void TargetX86Base<TraitsType>::lowerMemset(Operand *Dest, Operand *Val,
4163 Operand *Count) { 4149 Operand *Count) {
4164 constexpr uint32_t BytesPerStorep = 16; 4150 constexpr uint32_t BytesPerStorep = 16;
4165 constexpr uint32_t BytesPerStoreq = 8; 4151 constexpr uint32_t BytesPerStoreq = 8;
4166 constexpr uint32_t BytesPerStorei32 = 4; 4152 constexpr uint32_t BytesPerStorei32 = 4;
4167 assert(Val->getType() == IceType_i8); 4153 assert(Val->getType() == IceType_i8);
4168 4154
4169 // Check if the operands are constants 4155 // Check if the operands are constants
4170 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); 4156 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
4171 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); 4157 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
4172 const bool IsCountConst = CountConst != nullptr; 4158 const bool IsCountConst = CountConst != nullptr;
4173 const bool IsValConst = ValConst != nullptr; 4159 const bool IsValConst = ValConst != nullptr;
(...skipping 12 matching lines...) Expand all
4186 Variable *VecReg = nullptr; 4172 Variable *VecReg = nullptr;
4187 const uint32_t SpreadValue = 4173 const uint32_t SpreadValue =
4188 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; 4174 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
4189 4175
4190 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty, 4176 auto lowerSet = [this, &Base, SpreadValue, &VecReg](Type Ty,
4191 uint32_t OffsetAmt) { 4177 uint32_t OffsetAmt) {
4192 assert(Base != nullptr); 4178 assert(Base != nullptr);
4193 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr; 4179 Constant *Offset = OffsetAmt ? Ctx->getConstantInt32(OffsetAmt) : nullptr;
4194 4180
4195 // TODO(ascull): is 64-bit better with vector or scalar movq? 4181 // TODO(ascull): is 64-bit better with vector or scalar movq?
4196 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 4182 auto *Mem = X86OperandMem::create(Func, Ty, Base, Offset);
4197 if (isVectorType(Ty)) { 4183 if (isVectorType(Ty)) {
4198 assert(VecReg != nullptr); 4184 assert(VecReg != nullptr);
4199 _storep(VecReg, Mem); 4185 _storep(VecReg, Mem);
4200 } else if (Ty == IceType_f64) { 4186 } else if (Ty == IceType_f64) {
4201 assert(VecReg != nullptr); 4187 assert(VecReg != nullptr);
4202 _storeq(VecReg, Mem); 4188 _storeq(VecReg, Mem);
4203 } else { 4189 } else {
4204 assert(Ty != IceType_i64); 4190 assert(Ty != IceType_i64);
4205 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); 4191 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
4206 } 4192 }
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
4260 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); 4246 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
4261 ValExt = ValExtVar; 4247 ValExt = ValExtVar;
4262 } 4248 }
4263 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); 4249 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
4264 Call->addArg(Dest); 4250 Call->addArg(Dest);
4265 Call->addArg(ValExt); 4251 Call->addArg(ValExt);
4266 Call->addArg(Count); 4252 Call->addArg(Count);
4267 lowerCall(Call); 4253 lowerCall(Call);
4268 } 4254 }
4269 4255
4270 template <class Machine> 4256 template <typename TraitsType>
4271 void TargetX86Base<Machine>::lowerIndirectJump(Variable *JumpTarget) { 4257 void TargetX86Base<TraitsType>::lowerIndirectJump(Variable *JumpTarget) {
4272 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); 4258 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
4273 if (Traits::Is64Bit) { 4259 if (Traits::Is64Bit) {
4274 Variable *T = makeReg(IceType_i64); 4260 Variable *T = makeReg(IceType_i64);
4275 _movzx(T, JumpTarget); 4261 _movzx(T, JumpTarget);
4276 JumpTarget = T; 4262 JumpTarget = T;
4277 } 4263 }
4278 if (NeedSandboxing) { 4264 if (NeedSandboxing) {
4279 _bundle_lock(); 4265 _bundle_lock();
4280 const SizeT BundleSize = 4266 const SizeT BundleSize =
4281 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 4267 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
(...skipping 376 matching lines...) Expand 10 before | Expand all | Expand 10 after
4658 /// For the purpose of mocking the bounds check, we'll do something like this: 4644 /// For the purpose of mocking the bounds check, we'll do something like this:
4659 /// 4645 ///
4660 /// cmp reg, 0 4646 /// cmp reg, 0
4661 /// je label 4647 /// je label
4662 /// cmp reg, 1 4648 /// cmp reg, 1
4663 /// je label 4649 /// je label
4664 /// label: 4650 /// label:
4665 /// 4651 ///
4666 /// Also note that we don't need to add a bounds check to a dereference of a 4652 /// Also note that we don't need to add a bounds check to a dereference of a
4667 /// simple global variable address. 4653 /// simple global variable address.
4668 template <class Machine> 4654 template <typename TraitsType>
4669 void TargetX86Base<Machine>::doMockBoundsCheck(Operand *Opnd) { 4655 void TargetX86Base<TraitsType>::doMockBoundsCheck(Operand *Opnd) {
4670 if (!Ctx->getFlags().getMockBoundsCheck()) 4656 if (!Ctx->getFlags().getMockBoundsCheck())
4671 return; 4657 return;
4672 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd)) { 4658 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd)) {
4673 if (Mem->getIndex()) { 4659 if (Mem->getIndex()) {
4674 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg"); 4660 llvm::report_fatal_error("doMockBoundsCheck: Opnd contains index reg");
4675 } 4661 }
4676 Opnd = Mem->getBase(); 4662 Opnd = Mem->getBase();
4677 } 4663 }
4678 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps 4664 // At this point Opnd could be nullptr, or Variable, or Constant, or perhaps
4679 // something else. We only care if it is Variable. 4665 // something else. We only care if it is Variable.
4680 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd); 4666 auto *Var = llvm::dyn_cast_or_null<Variable>(Opnd);
4681 if (Var == nullptr) 4667 if (Var == nullptr)
4682 return; 4668 return;
4683 // We use lowerStore() to copy out-args onto the stack. This creates a memory 4669 // We use lowerStore() to copy out-args onto the stack. This creates a memory
4684 // operand with the stack pointer as the base register. Don't do bounds 4670 // operand with the stack pointer as the base register. Don't do bounds
4685 // checks on that. 4671 // checks on that.
4686 if (Var->getRegNum() == static_cast<int32_t>(getStackReg())) 4672 if (Var->getRegNum() == static_cast<int32_t>(getStackReg()))
4687 return; 4673 return;
4688 4674
4689 auto *Label = Traits::Insts::Label::create(Func, this); 4675 auto *Label = InstX86Label::create(Func, this);
4690 _cmp(Opnd, Ctx->getConstantZero(IceType_i32)); 4676 _cmp(Opnd, Ctx->getConstantZero(IceType_i32));
4691 _br(Traits::Cond::Br_e, Label); 4677 _br(Traits::Cond::Br_e, Label);
4692 _cmp(Opnd, Ctx->getConstantInt32(1)); 4678 _cmp(Opnd, Ctx->getConstantInt32(1));
4693 _br(Traits::Cond::Br_e, Label); 4679 _br(Traits::Cond::Br_e, Label);
4694 Context.insert(Label); 4680 Context.insert(Label);
4695 } 4681 }
4696 4682
4697 template <class Machine> 4683 template <typename TraitsType>
4698 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { 4684 void TargetX86Base<TraitsType>::lowerLoad(const InstLoad *Load) {
4699 // A Load instruction can be treated the same as an Assign instruction, after 4685 // A Load instruction can be treated the same as an Assign instruction, after
4700 // the source operand is transformed into an Traits::X86OperandMem operand. 4686 // the source operand is transformed into an X86OperandMem operand.
4701 // Note that the address mode optimization already creates an 4687 // Note that the address mode optimization already creates an
4702 // Traits::X86OperandMem operand, so it doesn't need another level of 4688 // X86OperandMem operand, so it doesn't need another level of
4703 // transformation. 4689 // transformation.
4704 Variable *DestLoad = Load->getDest(); 4690 Variable *DestLoad = Load->getDest();
4705 Type Ty = DestLoad->getType(); 4691 Type Ty = DestLoad->getType();
4706 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4692 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4707 doMockBoundsCheck(Src0); 4693 doMockBoundsCheck(Src0);
4708 auto *Assign = InstAssign::create(Func, DestLoad, Src0); 4694 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
4709 lowerAssign(Assign); 4695 lowerAssign(Assign);
4710 } 4696 }
4711 4697
4712 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { 4698 template <typename TraitsType>
4699 void TargetX86Base<TraitsType>::doAddressOptLoad() {
4713 Inst *Inst = Context.getCur(); 4700 Inst *Inst = Context.getCur();
4714 Variable *Dest = Inst->getDest(); 4701 Variable *Dest = Inst->getDest();
4715 Operand *Addr = Inst->getSrc(0); 4702 Operand *Addr = Inst->getSrc(0);
4716 Variable *Index = nullptr; 4703 Variable *Index = nullptr;
4717 ConstantRelocatable *Relocatable = nullptr; 4704 ConstantRelocatable *Relocatable = nullptr;
4718 uint16_t Shift = 0; 4705 uint16_t Shift = 0;
4719 int32_t Offset = 0; 4706 int32_t Offset = 0;
4720 // Vanilla ICE load instructions should not use the segment registers, and 4707 // Vanilla ICE load instructions should not use the segment registers, and
4721 // computeAddressOpt only works at the level of Variables and Constants, not 4708 // computeAddressOpt only works at the level of Variables and Constants, not
4722 // other Traits::X86OperandMem, so there should be no mention of segment 4709 // other X86OperandMem, so there should be no mention of segment
4723 // registers there either. 4710 // registers there either.
4724 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = 4711 const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment;
4725 Traits::X86OperandMem::DefaultSegment;
4726 auto *Base = llvm::dyn_cast<Variable>(Addr); 4712 auto *Base = llvm::dyn_cast<Variable>(Addr);
4727 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { 4713 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) {
4728 Inst->setDeleted(); 4714 Inst->setDeleted();
4729 Constant *OffsetOp = nullptr; 4715 Constant *OffsetOp = nullptr;
4730 if (Relocatable == nullptr) { 4716 if (Relocatable == nullptr) {
4731 OffsetOp = Ctx->getConstantInt32(Offset); 4717 OffsetOp = Ctx->getConstantInt32(Offset);
4732 } else { 4718 } else {
4733 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, 4719 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset,
4734 Relocatable->getName(), 4720 Relocatable->getName(),
4735 Relocatable->getSuppressMangling()); 4721 Relocatable->getSuppressMangling());
4736 } 4722 }
4737 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, 4723 Addr = X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp, Index,
4738 Index, Shift, SegmentReg); 4724 Shift, SegmentReg);
4739 Context.insert<InstLoad>(Dest, Addr); 4725 Context.insert<InstLoad>(Dest, Addr);
4740 } 4726 }
4741 } 4727 }
4742 4728
4743 template <class Machine> 4729 template <typename TraitsType>
4744 void TargetX86Base<Machine>::randomlyInsertNop(float Probability, 4730 void TargetX86Base<TraitsType>::randomlyInsertNop(float Probability,
4745 RandomNumberGenerator &RNG) { 4731 RandomNumberGenerator &RNG) {
4746 RandomNumberGeneratorWrapper RNGW(RNG); 4732 RandomNumberGeneratorWrapper RNGW(RNG);
4747 if (RNGW.getTrueWithProbability(Probability)) { 4733 if (RNGW.getTrueWithProbability(Probability)) {
4748 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); 4734 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
4749 } 4735 }
4750 } 4736 }
4751 4737
4752 template <class Machine> 4738 template <typename TraitsType>
4753 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { 4739 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) {
4754 Func->setError("Phi found in regular instruction list"); 4740 Func->setError("Phi found in regular instruction list");
4755 } 4741 }
4756 4742
4757 template <class Machine> 4743 template <typename TraitsType>
4758 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) { 4744 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
4759 Variable *Dest = Select->getDest(); 4745 Variable *Dest = Select->getDest();
4760 4746
4761 if (isVectorType(Dest->getType())) { 4747 if (isVectorType(Dest->getType())) {
4762 lowerSelectVector(Select); 4748 lowerSelectVector(Select);
4763 return; 4749 return;
4764 } 4750 }
4765 4751
4766 Operand *Condition = Select->getCondition(); 4752 Operand *Condition = Select->getCondition();
4767 // Handle folding opportunities. 4753 // Handle folding opportunities.
4768 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4754 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
(...skipping 11 matching lines...) Expand all
4780 return; 4766 return;
4781 } 4767 }
4782 } 4768 }
4783 } 4769 }
4784 4770
4785 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); 4771 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
4786 Operand *Zero = Ctx->getConstantZero(IceType_i32); 4772 Operand *Zero = Ctx->getConstantZero(IceType_i32);
4787 _cmp(CmpResult, Zero); 4773 _cmp(CmpResult, Zero);
4788 Operand *SrcT = Select->getTrueOperand(); 4774 Operand *SrcT = Select->getTrueOperand();
4789 Operand *SrcF = Select->getFalseOperand(); 4775 Operand *SrcF = Select->getFalseOperand();
4790 const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; 4776 const BrCond Cond = Traits::Cond::Br_ne;
4791 lowerSelectMove(Dest, Cond, SrcT, SrcF); 4777 lowerSelectMove(Dest, Cond, SrcT, SrcF);
4792 } 4778 }
4793 4779
4794 template <class Machine> 4780 template <typename TraitsType>
4795 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, 4781 void TargetX86Base<TraitsType>::lowerSelectMove(Variable *Dest, BrCond Cond,
4796 typename Traits::Cond::BrCond Cond, 4782 Operand *SrcT, Operand *SrcF) {
4797 Operand *SrcT, Operand *SrcF) {
4798 Type DestTy = Dest->getType(); 4783 Type DestTy = Dest->getType();
4799 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { 4784 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4800 // The cmov instruction doesn't allow 8-bit or FP operands, so we need 4785 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
4801 // explicit control flow. 4786 // explicit control flow.
4802 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: 4787 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4803 auto *Label = Traits::Insts::Label::create(Func, this); 4788 auto *Label = InstX86Label::create(Func, this);
4804 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4789 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4805 _mov(Dest, SrcT); 4790 _mov(Dest, SrcT);
4806 _br(Cond, Label); 4791 _br(Cond, Label);
4807 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4792 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4808 _redefined(_mov(Dest, SrcF)); 4793 _redefined(_mov(Dest, SrcF));
4809 Context.insert(Label); 4794 Context.insert(Label);
4810 return; 4795 return;
4811 } 4796 }
4812 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4797 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4813 // But if SrcT is immediate, we might be able to do better, as the cmov 4798 // But if SrcT is immediate, we might be able to do better, as the cmov
4814 // instruction doesn't allow an immediate operand: 4799 // instruction doesn't allow an immediate operand:
4815 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4800 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4816 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4801 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4817 std::swap(SrcT, SrcF); 4802 std::swap(SrcT, SrcF);
4818 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4803 Cond = InstImpl<TraitsType>::InstX86Base::getOppositeCondition(Cond);
4819 } 4804 }
4820 if (!Traits::Is64Bit && DestTy == IceType_i64) { 4805 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4821 SrcT = legalizeUndef(SrcT); 4806 SrcT = legalizeUndef(SrcT);
4822 SrcF = legalizeUndef(SrcF); 4807 SrcF = legalizeUndef(SrcF);
4823 // Set the low portion. 4808 // Set the low portion.
4824 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4809 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4825 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); 4810 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
4826 // Set the high portion. 4811 // Set the high portion.
4827 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4812 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4828 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); 4813 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
4829 return; 4814 return;
4830 } 4815 }
4831 4816
4832 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || 4817 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4833 (Traits::Is64Bit && DestTy == IceType_i64)); 4818 (Traits::Is64Bit && DestTy == IceType_i64));
4834 lowerSelectIntMove(Dest, Cond, SrcT, SrcF); 4819 lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
4835 } 4820 }
4836 4821
4837 template <class Machine> 4822 template <typename TraitsType>
4838 void TargetX86Base<Machine>::lowerSelectIntMove( 4823 void TargetX86Base<TraitsType>::lowerSelectIntMove(Variable *Dest, BrCond Cond,
4839 Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT, 4824 Operand *SrcT,
4840 Operand *SrcF) { 4825 Operand *SrcF) {
4841 Variable *T = nullptr; 4826 Variable *T = nullptr;
4842 SrcF = legalize(SrcF); 4827 SrcF = legalize(SrcF);
4843 _mov(T, SrcF); 4828 _mov(T, SrcF);
4844 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4829 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4845 _cmov(T, SrcT, Cond); 4830 _cmov(T, SrcT, Cond);
4846 _mov(Dest, T); 4831 _mov(Dest, T);
4847 } 4832 }
4848 4833
4849 template <class Machine> 4834 template <typename TraitsType>
4850 void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src, 4835 void TargetX86Base<TraitsType>::lowerMove(Variable *Dest, Operand *Src,
4851 bool IsRedefinition) { 4836 bool IsRedefinition) {
4852 assert(Dest->getType() == Src->getType()); 4837 assert(Dest->getType() == Src->getType());
4853 assert(!Dest->isRematerializable()); 4838 assert(!Dest->isRematerializable());
4854 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 4839 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
4855 Src = legalize(Src); 4840 Src = legalize(Src);
4856 Operand *SrcLo = loOperand(Src); 4841 Operand *SrcLo = loOperand(Src);
4857 Operand *SrcHi = hiOperand(Src); 4842 Operand *SrcHi = hiOperand(Src);
4858 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4843 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4859 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4844 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4860 Variable *T_Lo = nullptr, *T_Hi = nullptr; 4845 Variable *T_Lo = nullptr, *T_Hi = nullptr;
4861 _mov(T_Lo, SrcLo); 4846 _mov(T_Lo, SrcLo);
(...skipping 13 matching lines...) Expand all
4875 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); 4860 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
4876 } 4861 }
4877 if (isVectorType(Dest->getType())) { 4862 if (isVectorType(Dest->getType())) {
4878 _redefined(_movp(Dest, SrcLegal), IsRedefinition); 4863 _redefined(_movp(Dest, SrcLegal), IsRedefinition);
4879 } else { 4864 } else {
4880 _redefined(_mov(Dest, SrcLegal), IsRedefinition); 4865 _redefined(_mov(Dest, SrcLegal), IsRedefinition);
4881 } 4866 }
4882 } 4867 }
4883 } 4868 }
4884 4869
4885 template <class Machine> 4870 template <typename TraitsType>
4886 bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, 4871 bool TargetX86Base<TraitsType>::lowerOptimizeFcmpSelect(
4887 const InstSelect *Select) { 4872 const InstFcmp *Fcmp, const InstSelect *Select) {
4888 Operand *CmpSrc0 = Fcmp->getSrc(0); 4873 Operand *CmpSrc0 = Fcmp->getSrc(0);
4889 Operand *CmpSrc1 = Fcmp->getSrc(1); 4874 Operand *CmpSrc1 = Fcmp->getSrc(1);
4890 Operand *SelectSrcT = Select->getTrueOperand(); 4875 Operand *SelectSrcT = Select->getTrueOperand();
4891 Operand *SelectSrcF = Select->getFalseOperand(); 4876 Operand *SelectSrcF = Select->getFalseOperand();
4892 4877
4893 if (CmpSrc0->getType() != SelectSrcT->getType()) 4878 if (CmpSrc0->getType() != SelectSrcT->getType())
4894 return false; 4879 return false;
4895 4880
4896 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. 4881 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
4897 InstFcmp::FCond Condition = Fcmp->getCondition(); 4882 InstFcmp::FCond Condition = Fcmp->getCondition();
4898 switch (Condition) { 4883 switch (Condition) {
4899 default: 4884 default:
4900 return false; 4885 return false;
4901 case InstFcmp::True: 4886 case InstFcmp::True:
4902 case InstFcmp::False: 4887 case InstFcmp::False:
4903 case InstFcmp::Ogt: 4888 case InstFcmp::Ogt:
4904 case InstFcmp::Olt: 4889 case InstFcmp::Olt:
4905 (void)CmpSrc0; 4890 (void)CmpSrc0;
4906 (void)CmpSrc1; 4891 (void)CmpSrc1;
4907 (void)SelectSrcT; 4892 (void)SelectSrcT;
4908 (void)SelectSrcF; 4893 (void)SelectSrcF;
4909 break; 4894 break;
4910 } 4895 }
4911 return false; 4896 return false;
4912 } 4897 }
4913 4898
4914 template <class Machine> 4899 template <typename TraitsType>
4915 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) { 4900 void TargetX86Base<TraitsType>::lowerIcmp(const InstIcmp *Icmp) {
4916 Variable *Dest = Icmp->getDest(); 4901 Variable *Dest = Icmp->getDest();
4917 if (isVectorType(Dest->getType())) { 4902 if (isVectorType(Dest->getType())) {
4918 lowerIcmpVector(Icmp); 4903 lowerIcmpVector(Icmp);
4919 } else { 4904 } else {
4920 constexpr Inst *Consumer = nullptr; 4905 constexpr Inst *Consumer = nullptr;
4921 lowerIcmpAndConsumer(Icmp, Consumer); 4906 lowerIcmpAndConsumer(Icmp, Consumer);
4922 } 4907 }
4923 } 4908 }
4924 4909
4925 template <class Machine> 4910 template <typename TraitsType>
4926 void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) { 4911 void TargetX86Base<TraitsType>::lowerSelectVector(const InstSelect *Inst) {
4927 Variable *Dest = Inst->getDest(); 4912 Variable *Dest = Inst->getDest();
4928 Type DestTy = Dest->getType(); 4913 Type DestTy = Dest->getType();
4929 Operand *SrcT = Inst->getTrueOperand(); 4914 Operand *SrcT = Inst->getTrueOperand();
4930 Operand *SrcF = Inst->getFalseOperand(); 4915 Operand *SrcF = Inst->getFalseOperand();
4931 Operand *Condition = Inst->getCondition(); 4916 Operand *Condition = Inst->getCondition();
4932 4917
4933 if (!isVectorType(DestTy)) 4918 if (!isVectorType(DestTy))
4934 llvm::report_fatal_error("Expected a vector select"); 4919 llvm::report_fatal_error("Expected a vector select");
4935 4920
4936 Type SrcTy = SrcT->getType(); 4921 Type SrcTy = SrcT->getType();
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
4983 } 4968 }
4984 _movp(T2, T); 4969 _movp(T2, T);
4985 _pand(T, SrcTRM); 4970 _pand(T, SrcTRM);
4986 _pandn(T2, SrcFRM); 4971 _pandn(T2, SrcFRM);
4987 _por(T, T2); 4972 _por(T, T2);
4988 _movp(Dest, T); 4973 _movp(Dest, T);
4989 4974
4990 return; 4975 return;
4991 } 4976 }
4992 4977
4993 template <class Machine> 4978 template <typename TraitsType>
4994 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4979 void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) {
4995 Operand *Value = Inst->getData(); 4980 Operand *Value = Inst->getData();
4996 Operand *Addr = Inst->getAddr(); 4981 Operand *Addr = Inst->getAddr();
4997 typename Traits::X86OperandMem *NewAddr = 4982 X86OperandMem *NewAddr = formMemoryOperand(Addr, Value->getType());
4998 formMemoryOperand(Addr, Value->getType());
4999 doMockBoundsCheck(NewAddr); 4983 doMockBoundsCheck(NewAddr);
5000 Type Ty = NewAddr->getType(); 4984 Type Ty = NewAddr->getType();
5001 4985
5002 if (!Traits::Is64Bit && Ty == IceType_i64) { 4986 if (!Traits::Is64Bit && Ty == IceType_i64) {
5003 Value = legalizeUndef(Value); 4987 Value = legalizeUndef(Value);
5004 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4988 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
5005 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4989 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
5006 _store(ValueHi, 4990 _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
5007 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); 4991 _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
5008 _store(ValueLo,
5009 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
5010 } else if (isVectorType(Ty)) { 4992 } else if (isVectorType(Ty)) {
5011 _storep(legalizeToReg(Value), NewAddr); 4993 _storep(legalizeToReg(Value), NewAddr);
5012 } else { 4994 } else {
5013 Value = legalize(Value, Legal_Reg | Legal_Imm); 4995 Value = legalize(Value, Legal_Reg | Legal_Imm);
5014 _store(Value, NewAddr); 4996 _store(Value, NewAddr);
5015 } 4997 }
5016 } 4998 }
5017 4999
5018 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { 5000 template <typename TraitsType>
5001 void TargetX86Base<TraitsType>::doAddressOptStore() {
5019 auto *Inst = llvm::cast<InstStore>(Context.getCur()); 5002 auto *Inst = llvm::cast<InstStore>(Context.getCur());
5020 Operand *Data = Inst->getData(); 5003 Operand *Data = Inst->getData();
5021 Operand *Addr = Inst->getAddr(); 5004 Operand *Addr = Inst->getAddr();
5022 Variable *Index = nullptr; 5005 Variable *Index = nullptr;
5023 ConstantRelocatable *Relocatable = nullptr; 5006 ConstantRelocatable *Relocatable = nullptr;
5024 uint16_t Shift = 0; 5007 uint16_t Shift = 0;
5025 int32_t Offset = 0; 5008 int32_t Offset = 0;
5026 auto *Base = llvm::dyn_cast<Variable>(Addr); 5009 auto *Base = llvm::dyn_cast<Variable>(Addr);
5027 // Vanilla ICE store instructions should not use the segment registers, and 5010 // Vanilla ICE store instructions should not use the segment registers, and
5028 // computeAddressOpt only works at the level of Variables and Constants, not 5011 // computeAddressOpt only works at the level of Variables and Constants, not
5029 // other Traits::X86OperandMem, so there should be no mention of segment 5012 // other X86OperandMem, so there should be no mention of segment
5030 // registers there either. 5013 // registers there either.
5031 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg = 5014 const SegmentRegisters SegmentReg = X86OperandMem::DefaultSegment;
5032 Traits::X86OperandMem::DefaultSegment;
5033 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) { 5015 if (computeAddressOpt(Func, Inst, Relocatable, Offset, Base, Index, Shift)) {
5034 Inst->setDeleted(); 5016 Inst->setDeleted();
5035 Constant *OffsetOp = nullptr; 5017 Constant *OffsetOp = nullptr;
5036 if (Relocatable == nullptr) { 5018 if (Relocatable == nullptr) {
5037 OffsetOp = Ctx->getConstantInt32(Offset); 5019 OffsetOp = Ctx->getConstantInt32(Offset);
5038 } else { 5020 } else {
5039 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset, 5021 OffsetOp = Ctx->getConstantSym(Relocatable->getOffset() + Offset,
5040 Relocatable->getName(), 5022 Relocatable->getName(),
5041 Relocatable->getSuppressMangling()); 5023 Relocatable->getSuppressMangling());
5042 } 5024 }
5043 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, 5025 Addr = X86OperandMem::create(Func, Data->getType(), Base, OffsetOp, Index,
5044 Index, Shift, SegmentReg); 5026 Shift, SegmentReg);
5045 auto *NewStore = Context.insert<InstStore>(Data, Addr); 5027 auto *NewStore = Context.insert<InstStore>(Data, Addr);
5046 if (Inst->getDest()) 5028 if (Inst->getDest())
5047 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 5029 NewStore->setRmwBeacon(Inst->getRmwBeacon());
5048 } 5030 }
5049 } 5031 }
5050 5032
5051 template <class Machine> 5033 template <typename TraitsType>
5052 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, 5034 Operand *TargetX86Base<TraitsType>::lowerCmpRange(Operand *Comparison,
5053 uint64_t Min, uint64_t Max) { 5035 uint64_t Min, uint64_t Max) {
5054 // TODO(ascull): 64-bit should not reach here but only because it is not 5036 // TODO(ascull): 64-bit should not reach here but only because it is not
5055 // implemented yet. This should be able to handle the 64-bit case. 5037 // implemented yet. This should be able to handle the 64-bit case.
5056 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); 5038 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
5057 // Subtracting 0 is a nop so don't do it 5039 // Subtracting 0 is a nop so don't do it
5058 if (Min != 0) { 5040 if (Min != 0) {
5059 // Avoid clobbering the comparison by copying it 5041 // Avoid clobbering the comparison by copying it
5060 Variable *T = nullptr; 5042 Variable *T = nullptr;
5061 _mov(T, Comparison); 5043 _mov(T, Comparison);
5062 _sub(T, Ctx->getConstantInt32(Min)); 5044 _sub(T, Ctx->getConstantInt32(Min));
5063 Comparison = T; 5045 Comparison = T;
5064 } 5046 }
5065 5047
5066 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); 5048 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
5067 5049
5068 return Comparison; 5050 return Comparison;
5069 } 5051 }
5070 5052
5071 template <class Machine> 5053 template <typename TraitsType>
5072 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, 5054 void TargetX86Base<TraitsType>::lowerCaseCluster(const CaseCluster &Case,
5073 Operand *Comparison, bool DoneCmp, 5055 Operand *Comparison,
5074 CfgNode *DefaultTarget) { 5056 bool DoneCmp,
5057 CfgNode *DefaultTarget) {
5075 switch (Case.getKind()) { 5058 switch (Case.getKind()) {
5076 case CaseCluster::JumpTable: { 5059 case CaseCluster::JumpTable: {
5077 typename Traits::Insts::Label *SkipJumpTable; 5060 InstX86Label *SkipJumpTable;
5078 5061
5079 Operand *RangeIndex = 5062 Operand *RangeIndex =
5080 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); 5063 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
5081 if (DefaultTarget == nullptr) { 5064 if (DefaultTarget == nullptr) {
5082 // Skip over jump table logic if comparison not in range and no default 5065 // Skip over jump table logic if comparison not in range and no default
5083 SkipJumpTable = Traits::Insts::Label::create(Func, this); 5066 SkipJumpTable = InstX86Label::create(Func, this);
5084 _br(Traits::Cond::Br_a, SkipJumpTable); 5067 _br(Traits::Cond::Br_a, SkipJumpTable);
5085 } else { 5068 } else {
5086 _br(Traits::Cond::Br_a, DefaultTarget); 5069 _br(Traits::Cond::Br_a, DefaultTarget);
5087 } 5070 }
5088 5071
5089 InstJumpTable *JumpTable = Case.getJumpTable(); 5072 InstJumpTable *JumpTable = Case.getJumpTable();
5090 Context.insert(JumpTable); 5073 Context.insert(JumpTable);
5091 5074
5092 // Make sure the index is a register of the same width as the base 5075 // Make sure the index is a register of the same width as the base
5093 Variable *Index; 5076 Variable *Index;
5094 if (RangeIndex->getType() != getPointerType()) { 5077 if (RangeIndex->getType() != getPointerType()) {
5095 Index = makeReg(getPointerType()); 5078 Index = makeReg(getPointerType());
5096 _movzx(Index, RangeIndex); 5079 _movzx(Index, RangeIndex);
5097 } else { 5080 } else {
5098 Index = legalizeToReg(RangeIndex); 5081 Index = legalizeToReg(RangeIndex);
5099 } 5082 }
5100 5083
5101 constexpr RelocOffsetT RelocOffset = 0; 5084 constexpr RelocOffsetT RelocOffset = 0;
5102 constexpr bool SuppressMangling = true; 5085 constexpr bool SuppressMangling = true;
5103 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); 5086 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
5104 Constant *Base = Ctx->getConstantSym( 5087 Constant *Base = Ctx->getConstantSym(
5105 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), 5088 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()),
5106 SuppressMangling); 5089 SuppressMangling);
5107 Constant *Offset = nullptr; 5090 Constant *Offset = nullptr;
5108 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); 5091 uint16_t Shift = typeWidthInBytesLog2(getPointerType());
5109 // TODO(ascull): remove need for legalize by allowing null base in memop 5092 // TODO(ascull): remove need for legalize by allowing null base in memop
5110 auto *TargetInMemory = Traits::X86OperandMem::create( 5093 auto *TargetInMemory = X86OperandMem::create(
5111 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); 5094 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift);
5112 Variable *Target = nullptr; 5095 Variable *Target = nullptr;
5113 _mov(Target, TargetInMemory); 5096 _mov(Target, TargetInMemory);
5114 lowerIndirectJump(Target); 5097 lowerIndirectJump(Target);
5115 5098
5116 if (DefaultTarget == nullptr) 5099 if (DefaultTarget == nullptr)
5117 Context.insert(SkipJumpTable); 5100 Context.insert(SkipJumpTable);
5118 return; 5101 return;
5119 } 5102 }
5120 case CaseCluster::Range: { 5103 case CaseCluster::Range: {
(...skipping 15 matching lines...) Expand all
5136 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); 5119 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh());
5137 _br(Traits::Cond::Br_be, Case.getTarget()); 5120 _br(Traits::Cond::Br_be, Case.getTarget());
5138 } 5121 }
5139 if (DefaultTarget != nullptr) 5122 if (DefaultTarget != nullptr)
5140 _br(DefaultTarget); 5123 _br(DefaultTarget);
5141 return; 5124 return;
5142 } 5125 }
5143 } 5126 }
5144 } 5127 }
5145 5128
5146 template <class Machine> 5129 template <typename TraitsType>
5147 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 5130 void TargetX86Base<TraitsType>::lowerSwitch(const InstSwitch *Inst) {
5148 // Group cases together and navigate through them with a binary search 5131 // Group cases together and navigate through them with a binary search
5149 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); 5132 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
5150 Operand *Src0 = Inst->getComparison(); 5133 Operand *Src0 = Inst->getComparison();
5151 CfgNode *DefaultTarget = Inst->getLabelDefault(); 5134 CfgNode *DefaultTarget = Inst->getLabelDefault();
5152 5135
5153 assert(CaseClusters.size() != 0); // Should always be at least one 5136 assert(CaseClusters.size() != 0); // Should always be at least one
5154 5137
5155 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { 5138 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
5156 Src0 = legalize(Src0); // get Base/Index into physical registers 5139 Src0 = legalize(Src0); // get Base/Index into physical registers
5157 Operand *Src0Lo = loOperand(Src0); 5140 Operand *Src0Lo = loOperand(Src0);
5158 Operand *Src0Hi = hiOperand(Src0); 5141 Operand *Src0Hi = hiOperand(Src0);
5159 if (CaseClusters.back().getHigh() > UINT32_MAX) { 5142 if (CaseClusters.back().getHigh() > UINT32_MAX) {
5160 // TODO(ascull): handle 64-bit case properly (currently naive version) 5143 // TODO(ascull): handle 64-bit case properly (currently naive version)
5161 // This might be handled by a higher level lowering of switches. 5144 // This might be handled by a higher level lowering of switches.
5162 SizeT NumCases = Inst->getNumCases(); 5145 SizeT NumCases = Inst->getNumCases();
5163 if (NumCases >= 2) { 5146 if (NumCases >= 2) {
5164 Src0Lo = legalizeToReg(Src0Lo); 5147 Src0Lo = legalizeToReg(Src0Lo);
5165 Src0Hi = legalizeToReg(Src0Hi); 5148 Src0Hi = legalizeToReg(Src0Hi);
5166 } else { 5149 } else {
5167 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 5150 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
5168 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 5151 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
5169 } 5152 }
5170 for (SizeT I = 0; I < NumCases; ++I) { 5153 for (SizeT I = 0; I < NumCases; ++I) {
5171 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); 5154 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
5172 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); 5155 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
5173 typename Traits::Insts::Label *Label = 5156 InstX86Label *Label = InstX86Label::create(Func, this);
5174 Traits::Insts::Label::create(Func, this);
5175 _cmp(Src0Lo, ValueLo); 5157 _cmp(Src0Lo, ValueLo);
5176 _br(Traits::Cond::Br_ne, Label); 5158 _br(Traits::Cond::Br_ne, Label);
5177 _cmp(Src0Hi, ValueHi); 5159 _cmp(Src0Hi, ValueHi);
5178 _br(Traits::Cond::Br_e, Inst->getLabel(I)); 5160 _br(Traits::Cond::Br_e, Inst->getLabel(I));
5179 Context.insert(Label); 5161 Context.insert(Label);
5180 } 5162 }
5181 _br(Inst->getLabelDefault()); 5163 _br(Inst->getLabelDefault());
5182 return; 5164 return;
5183 } else { 5165 } else {
5184 // All the values are 32-bit so just check the operand is too and then 5166 // All the values are 32-bit so just check the operand is too and then
(...skipping 14 matching lines...) Expand all
5199 constexpr bool DoneCmp = false; 5181 constexpr bool DoneCmp = false;
5200 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); 5182 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget);
5201 return; 5183 return;
5202 } 5184 }
5203 5185
5204 // Going to be using multiple times so get it in a register early 5186 // Going to be using multiple times so get it in a register early
5205 Variable *Comparison = legalizeToReg(Src0); 5187 Variable *Comparison = legalizeToReg(Src0);
5206 5188
5207 // A span is over the clusters 5189 // A span is over the clusters
5208 struct SearchSpan { 5190 struct SearchSpan {
5209 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) 5191 SearchSpan(SizeT Begin, SizeT Size, InstX86Label *Label)
5210 : Begin(Begin), Size(Size), Label(Label) {} 5192 : Begin(Begin), Size(Size), Label(Label) {}
5211 5193
5212 SizeT Begin; 5194 SizeT Begin;
5213 SizeT Size; 5195 SizeT Size;
5214 typename Traits::Insts::Label *Label; 5196 InstX86Label *Label;
5215 }; 5197 };
5216 // The stack will only grow to the height of the tree so 12 should be plenty 5198 // The stack will only grow to the height of the tree so 12 should be plenty
5217 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack; 5199 std::stack<SearchSpan, llvm::SmallVector<SearchSpan, 12>> SearchSpanStack;
5218 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr); 5200 SearchSpanStack.emplace(0, CaseClusters.size(), nullptr);
5219 bool DoneCmp = false; 5201 bool DoneCmp = false;
5220 5202
5221 while (!SearchSpanStack.empty()) { 5203 while (!SearchSpanStack.empty()) {
5222 SearchSpan Span = SearchSpanStack.top(); 5204 SearchSpan Span = SearchSpanStack.top();
5223 SearchSpanStack.pop(); 5205 SearchSpanStack.pop();
5224 5206
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
5258 DoneCmp = false; 5240 DoneCmp = false;
5259 lowerCaseCluster(*CaseB, Comparison, DoneCmp, 5241 lowerCaseCluster(*CaseB, Comparison, DoneCmp,
5260 SearchSpanStack.empty() ? nullptr : DefaultTarget); 5242 SearchSpanStack.empty() ? nullptr : DefaultTarget);
5261 } break; 5243 } break;
5262 5244
5263 default: 5245 default:
5264 // Pick the middle item and branch b or ae 5246 // Pick the middle item and branch b or ae
5265 SizeT PivotIndex = Span.Begin + (Span.Size / 2); 5247 SizeT PivotIndex = Span.Begin + (Span.Size / 2);
5266 const CaseCluster &Pivot = CaseClusters[PivotIndex]; 5248 const CaseCluster &Pivot = CaseClusters[PivotIndex];
5267 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); 5249 Constant *Value = Ctx->getConstantInt32(Pivot.getLow());
5268 typename Traits::Insts::Label *Label = 5250 InstX86Label *Label = InstX86Label::create(Func, this);
5269 Traits::Insts::Label::create(Func, this);
5270 _cmp(Comparison, Value); 5251 _cmp(Comparison, Value);
5271 // TODO(ascull): does it alway have to be far? 5252 // TODO(ascull): does it alway have to be far?
5272 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far); 5253 _br(Traits::Cond::Br_b, Label, InstX86Br::Far);
5273 // Lower the left and (pivot+right) sides, falling through to the right 5254 // Lower the left and (pivot+right) sides, falling through to the right
5274 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); 5255 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label);
5275 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); 5256 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr);
5276 DoneCmp = true; 5257 DoneCmp = true;
5277 break; 5258 break;
5278 } 5259 }
5279 } 5260 }
5280 5261
5281 _br(DefaultTarget); 5262 _br(DefaultTarget);
5282 } 5263 }
5283 5264
5284 template <class Machine> 5265 template <typename TraitsType>
5285 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, 5266 void TargetX86Base<TraitsType>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
5286 Variable *Dest, Operand *Src0, 5267 Variable *Dest,
5287 Operand *Src1) { 5268 Operand *Src0,
5269 Operand *Src1) {
5288 assert(isVectorType(Dest->getType())); 5270 assert(isVectorType(Dest->getType()));
5289 Type Ty = Dest->getType(); 5271 Type Ty = Dest->getType();
5290 Type ElementTy = typeElementType(Ty); 5272 Type ElementTy = typeElementType(Ty);
5291 SizeT NumElements = typeNumElements(Ty); 5273 SizeT NumElements = typeNumElements(Ty);
5292 5274
5293 Operand *T = Ctx->getConstantUndef(Ty); 5275 Operand *T = Ctx->getConstantUndef(Ty);
5294 for (SizeT I = 0; I < NumElements; ++I) { 5276 for (SizeT I = 0; I < NumElements; ++I) {
5295 Constant *Index = Ctx->getConstantInt32(I); 5277 Constant *Index = Ctx->getConstantInt32(I);
5296 5278
5297 // Extract the next two inputs. 5279 // Extract the next two inputs.
(...skipping 18 matching lines...) Expand all
5316 } 5298 }
5317 5299
5318 /// The following pattern occurs often in lowered C and C++ code: 5300 /// The following pattern occurs often in lowered C and C++ code:
5319 /// 5301 ///
5320 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 5302 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
5321 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> 5303 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>
5322 /// 5304 ///
5323 /// We can eliminate the sext operation by copying the result of pcmpeqd, 5305 /// We can eliminate the sext operation by copying the result of pcmpeqd,
5324 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the 5306 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
5325 /// sext operation. 5307 /// sext operation.
5326 template <class Machine> 5308 template <typename TraitsType>
5327 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( 5309 void TargetX86Base<TraitsType>::eliminateNextVectorSextInstruction(
5328 Variable *SignExtendedResult) { 5310 Variable *SignExtendedResult) {
5329 if (auto *NextCast = 5311 if (auto *NextCast =
5330 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { 5312 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
5331 if (NextCast->getCastKind() == InstCast::Sext && 5313 if (NextCast->getCastKind() == InstCast::Sext &&
5332 NextCast->getSrc(0) == SignExtendedResult) { 5314 NextCast->getSrc(0) == SignExtendedResult) {
5333 NextCast->setDeleted(); 5315 NextCast->setDeleted();
5334 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult)); 5316 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
5335 // Skip over the instruction. 5317 // Skip over the instruction.
5336 Context.advanceNext(); 5318 Context.advanceNext();
5337 } 5319 }
5338 } 5320 }
5339 } 5321 }
5340 5322
5341 template <class Machine> 5323 template <typename TraitsType>
5342 void TargetX86Base<Machine>::lowerUnreachable( 5324 void TargetX86Base<TraitsType>::lowerUnreachable(
5343 const InstUnreachable * /*Inst*/) { 5325 const InstUnreachable * /*Inst*/) {
5344 _ud2(); 5326 _ud2();
5345 // Add a fake use of esp to make sure esp adjustments after the unreachable 5327 // Add a fake use of esp to make sure esp adjustments after the unreachable
5346 // do not get dead-code eliminated. 5328 // do not get dead-code eliminated.
5347 keepEspLiveAtExit(); 5329 keepEspLiveAtExit();
5348 } 5330 }
5349 5331
5350 template <class Machine> 5332 template <typename TraitsType>
5351 void TargetX86Base<Machine>::lowerRMW( 5333 void TargetX86Base<TraitsType>::lowerRMW(const InstX86FakeRMW *RMW) {
5352 const typename Traits::Insts::FakeRMW *RMW) {
5353 // If the beacon variable's live range does not end in this instruction, then 5334 // If the beacon variable's live range does not end in this instruction, then
5354 // it must end in the modified Store instruction that follows. This means 5335 // it must end in the modified Store instruction that follows. This means
5355 // that the original Store instruction is still there, either because the 5336 // that the original Store instruction is still there, either because the
5356 // value being stored is used beyond the Store instruction, or because dead 5337 // value being stored is used beyond the Store instruction, or because dead
5357 // code elimination did not happen. In either case, we cancel RMW lowering 5338 // code elimination did not happen. In either case, we cancel RMW lowering
5358 // (and the caller deletes the RMW instruction). 5339 // (and the caller deletes the RMW instruction).
5359 if (!RMW->isLastUse(RMW->getBeacon())) 5340 if (!RMW->isLastUse(RMW->getBeacon()))
5360 return; 5341 return;
5361 Operand *Src = RMW->getData(); 5342 Operand *Src = RMW->getData();
5362 Type Ty = Src->getType(); 5343 Type Ty = Src->getType();
5363 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 5344 X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
5364 doMockBoundsCheck(Addr); 5345 doMockBoundsCheck(Addr);
5365 if (!Traits::Is64Bit && Ty == IceType_i64) { 5346 if (!Traits::Is64Bit && Ty == IceType_i64) {
5366 Src = legalizeUndef(Src); 5347 Src = legalizeUndef(Src);
5367 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 5348 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
5368 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 5349 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
5369 typename Traits::X86OperandMem *AddrLo = 5350 X86OperandMem *AddrLo = llvm::cast<X86OperandMem>(loOperand(Addr));
5370 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); 5351 X86OperandMem *AddrHi = llvm::cast<X86OperandMem>(hiOperand(Addr));
5371 typename Traits::X86OperandMem *AddrHi =
5372 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
5373 switch (RMW->getOp()) { 5352 switch (RMW->getOp()) {
5374 default: 5353 default:
5375 // TODO(stichnot): Implement other arithmetic operators. 5354 // TODO(stichnot): Implement other arithmetic operators.
5376 break; 5355 break;
5377 case InstArithmetic::Add: 5356 case InstArithmetic::Add:
5378 _add_rmw(AddrLo, SrcLo); 5357 _add_rmw(AddrLo, SrcLo);
5379 _adc_rmw(AddrHi, SrcHi); 5358 _adc_rmw(AddrHi, SrcHi);
5380 return; 5359 return;
5381 case InstArithmetic::Sub: 5360 case InstArithmetic::Sub:
5382 _sub_rmw(AddrLo, SrcLo); 5361 _sub_rmw(AddrLo, SrcLo);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
5420 return; 5399 return;
5421 case InstArithmetic::Xor: 5400 case InstArithmetic::Xor:
5422 Src = legalize(Src, Legal_Reg | Legal_Imm); 5401 Src = legalize(Src, Legal_Reg | Legal_Imm);
5423 _xor_rmw(Addr, Src); 5402 _xor_rmw(Addr, Src);
5424 return; 5403 return;
5425 } 5404 }
5426 } 5405 }
5427 llvm::report_fatal_error("Couldn't lower RMW instruction"); 5406 llvm::report_fatal_error("Couldn't lower RMW instruction");
5428 } 5407 }
5429 5408
5430 template <class Machine> 5409 template <typename TraitsType>
5431 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { 5410 void TargetX86Base<TraitsType>::lowerOther(const Inst *Instr) {
5432 if (const auto *RMW = 5411 if (const auto *RMW = llvm::dyn_cast<InstX86FakeRMW>(Instr)) {
5433 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
5434 lowerRMW(RMW); 5412 lowerRMW(RMW);
5435 } else { 5413 } else {
5436 TargetLowering::lowerOther(Instr); 5414 TargetLowering::lowerOther(Instr);
5437 } 5415 }
5438 } 5416 }
5439 5417
5440 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve 5418 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5441 /// integrity of liveness analysis. Undef values are also turned into zeroes, 5419 /// integrity of liveness analysis. Undef values are also turned into zeroes,
5442 /// since loOperand() and hiOperand() don't expect Undef input. 5420 /// since loOperand() and hiOperand() don't expect Undef input.
5443 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 5421 template <typename TraitsType> void TargetX86Base<TraitsType>::prelowerPhis() {
5444 if (Traits::Is64Bit) { 5422 if (Traits::Is64Bit) {
5445 // On x86-64 we don't need to prelower phis -- the architecture can handle 5423 // On x86-64 we don't need to prelower phis -- the architecture can handle
5446 // 64-bit integer natively. 5424 // 64-bit integer natively.
5447 return; 5425 return;
5448 } 5426 }
5449 5427
5450 // Pause constant blinding or pooling, blinding or pooling will be done later 5428 // Pause constant blinding or pooling, blinding or pooling will be done later
5451 // during phi lowering assignments 5429 // during phi lowering assignments
5452 BoolFlagSaver B(RandomizationPoolingPaused, true); 5430 BoolFlagSaver B(RandomizationPoolingPaused, true);
5453 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 5431 PhiLowering::prelowerPhis32Bit<TargetX86Base<TraitsType>>(
5454 this, Context.getNode(), Func); 5432 this, Context.getNode(), Func);
5455 } 5433 }
5456 5434
5457 template <class Machine> 5435 template <typename TraitsType>
5458 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) { 5436 void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
5459 uint32_t StackArgumentsSize = 0; 5437 uint32_t StackArgumentsSize = 0;
5460 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { 5438 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5461 const char *HelperName = nullptr; 5439 const char *HelperName = nullptr;
5462 Variable *Dest = Arith->getDest(); 5440 Variable *Dest = Arith->getDest();
5463 Type DestTy = Dest->getType(); 5441 Type DestTy = Dest->getType();
5464 if (!Traits::Is64Bit && DestTy == IceType_i64) { 5442 if (!Traits::Is64Bit && DestTy == IceType_i64) {
5465 switch (Arith->getOp()) { 5443 switch (Arith->getOp()) {
5466 default: 5444 default:
5467 return; 5445 return;
5468 case InstArithmetic::Udiv: 5446 case InstArithmetic::Udiv:
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after
5673 if (!isScalarFloatingType(ReturnType)) 5651 if (!isScalarFloatingType(ReturnType))
5674 return; 5652 return;
5675 StackArgumentsSize = typeWidthInBytes(ReturnType); 5653 StackArgumentsSize = typeWidthInBytes(ReturnType);
5676 } else { 5654 } else {
5677 return; 5655 return;
5678 } 5656 }
5679 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize); 5657 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);
5680 updateMaxOutArgsSizeBytes(StackArgumentsSize); 5658 updateMaxOutArgsSizeBytes(StackArgumentsSize);
5681 } 5659 }
5682 5660
5683 template <class Machine> 5661 template <typename TraitsType>
5684 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes( 5662 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
5685 const std::vector<Type> &ArgTypes, Type ReturnType) { 5663 const std::vector<Type> &ArgTypes, Type ReturnType) {
5686 uint32_t OutArgumentsSizeBytes = 0; 5664 uint32_t OutArgumentsSizeBytes = 0;
5687 uint32_t XmmArgCount = 0; 5665 uint32_t XmmArgCount = 0;
5688 uint32_t GprArgCount = 0; 5666 uint32_t GprArgCount = 0;
5689 for (Type Ty : ArgTypes) { 5667 for (Type Ty : ArgTypes) {
5690 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 5668 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
5691 assert(typeWidthInBytes(Ty) >= 4); 5669 assert(typeWidthInBytes(Ty) >= 4);
5692 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { 5670 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
5693 ++XmmArgCount; 5671 ++XmmArgCount;
5694 } else if (isScalarIntegerType(Ty) && 5672 } else if (isScalarIntegerType(Ty) &&
(...skipping 13 matching lines...) Expand all
5708 // The 32 bit ABI requires floating point values to be returned on the x87 FP 5686 // The 32 bit ABI requires floating point values to be returned on the x87 FP
5709 // stack. Ensure there is enough space for the fstp/movs for floating returns. 5687 // stack. Ensure there is enough space for the fstp/movs for floating returns.
5710 if (isScalarFloatingType(ReturnType)) { 5688 if (isScalarFloatingType(ReturnType)) {
5711 OutArgumentsSizeBytes = 5689 OutArgumentsSizeBytes =
5712 std::max(OutArgumentsSizeBytes, 5690 std::max(OutArgumentsSizeBytes,
5713 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType))); 5691 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));
5714 } 5692 }
5715 return OutArgumentsSizeBytes; 5693 return OutArgumentsSizeBytes;
5716 } 5694 }
5717 5695
5718 template <class Machine> 5696 template <typename TraitsType>
5719 uint32_t 5697 uint32_t TargetX86Base<TraitsType>::getCallStackArgumentsSizeBytes(
5720 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { 5698 const InstCall *Instr) {
5721 // Build a vector of the arguments' types. 5699 // Build a vector of the arguments' types.
5722 std::vector<Type> ArgTypes; 5700 std::vector<Type> ArgTypes;
5723 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 5701 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
5724 Operand *Arg = Instr->getArg(i); 5702 Operand *Arg = Instr->getArg(i);
5725 ArgTypes.emplace_back(Arg->getType()); 5703 ArgTypes.emplace_back(Arg->getType());
5726 } 5704 }
5727 // Compute the return type (if any); 5705 // Compute the return type (if any);
5728 Type ReturnType = IceType_void; 5706 Type ReturnType = IceType_void;
5729 Variable *Dest = Instr->getDest(); 5707 Variable *Dest = Instr->getDest();
5730 if (Dest != nullptr) 5708 if (Dest != nullptr)
5731 ReturnType = Dest->getType(); 5709 ReturnType = Dest->getType();
5732 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType); 5710 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
5733 } 5711 }
5734 5712
5735 template <class Machine> 5713 template <typename TraitsType>
5736 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { 5714 Variable *TargetX86Base<TraitsType>::makeZeroedRegister(Type Ty,
5715 int32_t RegNum) {
5737 Variable *Reg = makeReg(Ty, RegNum); 5716 Variable *Reg = makeReg(Ty, RegNum);
5738 switch (Ty) { 5717 switch (Ty) {
5739 case IceType_i1: 5718 case IceType_i1:
5740 case IceType_i8: 5719 case IceType_i8:
5741 case IceType_i16: 5720 case IceType_i16:
5742 case IceType_i32: 5721 case IceType_i32:
5743 case IceType_i64: 5722 case IceType_i64:
5744 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. 5723 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
5745 _mov(Reg, Ctx->getConstantZero(Ty)); 5724 _mov(Reg, Ctx->getConstantZero(Ty));
5746 break; 5725 break;
(...skipping 12 matching lines...) Expand all
5759 return Reg; 5738 return Reg;
5760 } 5739 }
5761 5740
5762 // There is no support for loading or emitting vector constants, so the vector 5741 // There is no support for loading or emitting vector constants, so the vector
5763 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are 5742 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
5764 // initialized with register operations. 5743 // initialized with register operations.
5765 // 5744 //
5766 // TODO(wala): Add limited support for vector constants so that complex 5745 // TODO(wala): Add limited support for vector constants so that complex
5767 // initialization in registers is unnecessary. 5746 // initialization in registers is unnecessary.
5768 5747
5769 template <class Machine> 5748 template <typename TraitsType>
5770 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { 5749 Variable *TargetX86Base<TraitsType>::makeVectorOfZeros(Type Ty,
5750 int32_t RegNum) {
5771 return makeZeroedRegister(Ty, RegNum); 5751 return makeZeroedRegister(Ty, RegNum);
5772 } 5752 }
5773 5753
5774 template <class Machine> 5754 template <typename TraitsType>
5775 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, 5755 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,
5776 int32_t RegNum) { 5756 int32_t RegNum) {
5777 Variable *MinusOnes = makeReg(Ty, RegNum); 5757 Variable *MinusOnes = makeReg(Ty, RegNum);
5778 // Insert a FakeDef so the live range of MinusOnes is not overestimated. 5758 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
5779 Context.insert<InstFakeDef>(MinusOnes); 5759 Context.insert<InstFakeDef>(MinusOnes);
5780 _pcmpeq(MinusOnes, MinusOnes); 5760 _pcmpeq(MinusOnes, MinusOnes);
5781 return MinusOnes; 5761 return MinusOnes;
5782 } 5762 }
5783 5763
5784 template <class Machine> 5764 template <typename TraitsType>
5785 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { 5765 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
5786 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 5766 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
5787 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 5767 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
5788 _psub(Dest, MinusOne); 5768 _psub(Dest, MinusOne);
5789 return Dest; 5769 return Dest;
5790 } 5770 }
5791 5771
5792 template <class Machine> 5772 template <typename TraitsType>
5793 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, 5773 Variable *TargetX86Base<TraitsType>::makeVectorOfHighOrderBits(Type Ty,
5794 int32_t RegNum) { 5774 int32_t RegNum) {
5795 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 5775 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5796 Ty == IceType_v16i8); 5776 Ty == IceType_v16i8);
5797 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 5777 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5798 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 5778 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5799 SizeT Shift = 5779 SizeT Shift =
5800 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; 5780 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
5801 _psll(Reg, Ctx->getConstantInt8(Shift)); 5781 _psll(Reg, Ctx->getConstantInt8(Shift));
5802 return Reg; 5782 return Reg;
5803 } else { 5783 } else {
5804 // SSE has no left shift operation for vectors of 8 bit integers. 5784 // SSE has no left shift operation for vectors of 8 bit integers.
5805 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 5785 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
5806 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 5786 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
5807 Variable *Reg = makeReg(Ty, RegNum); 5787 Variable *Reg = makeReg(Ty, RegNum);
5808 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 5788 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5809 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 5789 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5810 return Reg; 5790 return Reg;
5811 } 5791 }
5812 } 5792 }
5813 5793
5814 /// Construct a mask in a register that can be and'ed with a floating-point 5794 /// Construct a mask in a register that can be and'ed with a floating-point
5815 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 5795 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
5816 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of 5796 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
5817 /// ones logically right shifted one bit. 5797 /// ones logically right shifted one bit.
5818 // TODO(stichnot): Fix the wala 5798 // TODO(stichnot): Fix the wala
5819 // TODO: above, to represent vector constants in memory. 5799 // TODO: above, to represent vector constants in memory.
5820 template <class Machine> 5800 template <typename TraitsType>
5821 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, 5801 Variable *TargetX86Base<TraitsType>::makeVectorOfFabsMask(Type Ty,
5822 int32_t RegNum) { 5802 int32_t RegNum) {
5823 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 5803 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
5824 _psrl(Reg, Ctx->getConstantInt8(1)); 5804 _psrl(Reg, Ctx->getConstantInt8(1));
5825 return Reg; 5805 return Reg;
5826 } 5806 }
5827 5807
5828 template <class Machine> 5808 template <typename TraitsType>
5829 typename TargetX86Base<Machine>::Traits::X86OperandMem * 5809 typename TargetX86Base<TraitsType>::X86OperandMem *
5830 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 5810 TargetX86Base<TraitsType>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
5831 uint32_t Offset) { 5811 uint32_t Offset) {
5832 // Ensure that Loc is a stack slot. 5812 // Ensure that Loc is a stack slot.
5833 assert(Slot->mustNotHaveReg()); 5813 assert(Slot->mustNotHaveReg());
5834 assert(Slot->getRegNum() == Variable::NoRegister); 5814 assert(Slot->getRegNum() == Variable::NoRegister);
5835 // Compute the location of Loc in memory. 5815 // Compute the location of Loc in memory.
5836 // TODO(wala,stichnot): lea should not 5816 // TODO(wala,stichnot): lea should not
5837 // be required. The address of the stack slot is known at compile time 5817 // be required. The address of the stack slot is known at compile time
5838 // (although not until after addProlog()). 5818 // (although not until after addProlog()).
5839 constexpr Type PointerType = IceType_i32; 5819 constexpr Type PointerType = IceType_i32;
5840 Variable *Loc = makeReg(PointerType); 5820 Variable *Loc = makeReg(PointerType);
5841 _lea(Loc, Slot); 5821 _lea(Loc, Slot);
5842 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 5822 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5843 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); 5823 return X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
5844 } 5824 }
5845 5825
5846 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR. 5826 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
5847 /// Src is assumed to already be legalized. If the source operand is known to 5827 /// Src is assumed to already be legalized. If the source operand is known to
5848 /// be a memory or immediate operand, a simple mov will suffice. But if the 5828 /// be a memory or immediate operand, a simple mov will suffice. But if the
5849 /// source operand can be a physical register, then it must first be copied into 5829 /// source operand can be a physical register, then it must first be copied into
5850 /// a physical register that is truncable to 8-bit, then truncated into a 5830 /// a physical register that is truncable to 8-bit, then truncated into a
5851 /// physical register that can receive a truncation, and finally copied into the 5831 /// physical register that can receive a truncation, and finally copied into the
5852 /// result 8-bit register (which in general can be any 8-bit register). For 5832 /// result 8-bit register (which in general can be any 8-bit register). For
5853 /// example, moving %ebp into %ah may be accomplished as: 5833 /// example, moving %ebp into %ah may be accomplished as:
(...skipping 10 matching lines...) Expand all
5864 /// Reg_ah. 5844 /// Reg_ah.
5865 /// 5845 ///
5866 /// Note #2. ConstantRelocatable operands are also put through this process 5846 /// Note #2. ConstantRelocatable operands are also put through this process
5867 /// (not truncated directly) because our ELF emitter does R_386_32 relocations 5847 /// (not truncated directly) because our ELF emitter does R_386_32 relocations
5868 /// but not R_386_8 relocations. 5848 /// but not R_386_8 relocations.
5869 /// 5849 ///
5870 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8 5850 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8
5871 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper 5851 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
5872 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument 5852 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
5873 /// to the pinsrb instruction. 5853 /// to the pinsrb instruction.
5874 template <class Machine> 5854 template <typename TraitsType>
5875 Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) { 5855 Variable *TargetX86Base<TraitsType>::copyToReg8(Operand *Src, int32_t RegNum) {
5876 Type Ty = Src->getType(); 5856 Type Ty = Src->getType();
5877 assert(isScalarIntegerType(Ty)); 5857 assert(isScalarIntegerType(Ty));
5878 assert(Ty != IceType_i1); 5858 assert(Ty != IceType_i1);
5879 Variable *Reg = makeReg(IceType_i8, RegNum); 5859 Variable *Reg = makeReg(IceType_i8, RegNum);
5880 Reg->setRegClass(RCX86_IsTrunc8Rcvr); 5860 Reg->setRegClass(RCX86_IsTrunc8Rcvr);
5881 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) { 5861 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
5882 Variable *SrcTruncable = makeReg(Ty); 5862 Variable *SrcTruncable = makeReg(Ty);
5883 switch (Ty) { 5863 switch (Ty) {
5884 case IceType_i64: 5864 case IceType_i64:
5885 SrcTruncable->setRegClass(RCX86_Is64To8); 5865 SrcTruncable->setRegClass(RCX86_Is64To8);
(...skipping 13 matching lines...) Expand all
5899 _mov(SrcTruncable, Src); 5879 _mov(SrcTruncable, Src);
5900 _mov(SrcRcvr, SrcTruncable); 5880 _mov(SrcRcvr, SrcTruncable);
5901 Src = SrcRcvr; 5881 Src = SrcRcvr;
5902 } 5882 }
5903 _mov(Reg, Src); 5883 _mov(Reg, Src);
5904 return Reg; 5884 return Reg;
5905 } 5885 }
5906 5886
5907 /// Helper for legalize() to emit the right code to lower an operand to a 5887 /// Helper for legalize() to emit the right code to lower an operand to a
5908 /// register of the appropriate type. 5888 /// register of the appropriate type.
5909 template <class Machine> 5889 template <typename TraitsType>
5910 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 5890 Variable *TargetX86Base<TraitsType>::copyToReg(Operand *Src, int32_t RegNum) {
5911 Type Ty = Src->getType(); 5891 Type Ty = Src->getType();
5912 Variable *Reg = makeReg(Ty, RegNum); 5892 Variable *Reg = makeReg(Ty, RegNum);
5913 if (isVectorType(Ty)) { 5893 if (isVectorType(Ty)) {
5914 _movp(Reg, Src); 5894 _movp(Reg, Src);
5915 } else { 5895 } else {
5916 _mov(Reg, Src); 5896 _mov(Reg, Src);
5917 } 5897 }
5918 return Reg; 5898 return Reg;
5919 } 5899 }
5920 5900
5921 template <class Machine> 5901 template <typename TraitsType>
5922 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, 5902 Operand *TargetX86Base<TraitsType>::legalize(Operand *From, LegalMask Allowed,
5923 int32_t RegNum) { 5903 int32_t RegNum) {
5924 Type Ty = From->getType(); 5904 Type Ty = From->getType();
5925 // Assert that a physical register is allowed. To date, all calls to 5905 // Assert that a physical register is allowed. To date, all calls to
5926 // legalize() allow a physical register. If a physical register needs to be 5906 // legalize() allow a physical register. If a physical register needs to be
5927 // explicitly disallowed, then new code will need to be written to force a 5907 // explicitly disallowed, then new code will need to be written to force a
5928 // spill. 5908 // spill.
5929 assert(Allowed & Legal_Reg); 5909 assert(Allowed & Legal_Reg);
5930 // If we're asking for a specific physical register, make sure we're not 5910 // If we're asking for a specific physical register, make sure we're not
5931 // allowing any other operand kinds. (This could be future work, e.g. allow 5911 // allowing any other operand kinds. (This could be future work, e.g. allow
5932 // the shl shift amount to be either an immediate or in ecx.) 5912 // the shl shift amount to be either an immediate or in ecx.)
5933 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); 5913 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
5934 5914
5935 // Substitute with an available infinite-weight variable if possible. Only do 5915 // Substitute with an available infinite-weight variable if possible. Only do
5936 // this when we are not asking for a specific register, and when the 5916 // this when we are not asking for a specific register, and when the
5937 // substitution is not locked to a specific register, and when the types 5917 // substitution is not locked to a specific register, and when the types
5938 // match, in order to capture the vast majority of opportunities and avoid 5918 // match, in order to capture the vast majority of opportunities and avoid
5939 // corner cases in the lowering. 5919 // corner cases in the lowering.
5940 if (RegNum == Variable::NoRegister) { 5920 if (RegNum == Variable::NoRegister) {
5941 if (Variable *Subst = getContext().availabilityGet(From)) { 5921 if (Variable *Subst = getContext().availabilityGet(From)) {
5942 // At this point we know there is a potential substitution available. 5922 // At this point we know there is a potential substitution available.
5943 if (Subst->mustHaveReg() && !Subst->hasReg()) { 5923 if (Subst->mustHaveReg() && !Subst->hasReg()) {
5944 // At this point we know the substitution will have a register. 5924 // At this point we know the substitution will have a register.
5945 if (From->getType() == Subst->getType()) { 5925 if (From->getType() == Subst->getType()) {
5946 // At this point we know the substitution's register is compatible. 5926 // At this point we know the substitution's register is compatible.
5947 return Subst; 5927 return Subst;
5948 } 5928 }
5949 } 5929 }
5950 } 5930 }
5951 } 5931 }
5952 5932
5953 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { 5933 if (auto *Mem = llvm::dyn_cast<X86OperandMem>(From)) {
5954 // Before doing anything with a Mem operand, we need to ensure that the 5934 // Before doing anything with a Mem operand, we need to ensure that the
5955 // Base and Index components are in physical registers. 5935 // Base and Index components are in physical registers.
5956 Variable *Base = Mem->getBase(); 5936 Variable *Base = Mem->getBase();
5957 Variable *Index = Mem->getIndex(); 5937 Variable *Index = Mem->getIndex();
5958 Variable *RegBase = nullptr; 5938 Variable *RegBase = nullptr;
5959 Variable *RegIndex = nullptr; 5939 Variable *RegIndex = nullptr;
5960 if (Base) { 5940 if (Base) {
5961 RegBase = llvm::cast<Variable>( 5941 RegBase = llvm::cast<Variable>(
5962 legalize(Base, Legal_Reg | Legal_Rematerializable)); 5942 legalize(Base, Legal_Reg | Legal_Rematerializable));
5963 } 5943 }
5964 if (Index) { 5944 if (Index) {
5965 RegIndex = llvm::cast<Variable>( 5945 RegIndex = llvm::cast<Variable>(
5966 legalize(Index, Legal_Reg | Legal_Rematerializable)); 5946 legalize(Index, Legal_Reg | Legal_Rematerializable));
5967 } 5947 }
5968 if (Base != RegBase || Index != RegIndex) { 5948 if (Base != RegBase || Index != RegIndex) {
5969 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), 5949 Mem = X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
5970 RegIndex, Mem->getShift(), 5950 Mem->getShift(), Mem->getSegmentRegister());
5971 Mem->getSegmentRegister());
5972 } 5951 }
5973 5952
5974 // For all Memory Operands, we do randomization/pooling here 5953 // For all Memory Operands, we do randomization/pooling here
5975 From = randomizeOrPoolImmediate(Mem); 5954 From = randomizeOrPoolImmediate(Mem);
5976 5955
5977 if (!(Allowed & Legal_Mem)) { 5956 if (!(Allowed & Legal_Mem)) {
5978 From = copyToReg(From, RegNum); 5957 From = copyToReg(From, RegNum);
5979 } 5958 }
5980 return From; 5959 return From;
5981 } 5960 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
6018 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) { 5997 } else if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(Const)) {
6019 if (Utils::isPositiveZero(ConstDouble->getValue())) 5998 if (Utils::isPositiveZero(ConstDouble->getValue()))
6020 return makeZeroedRegister(Ty, RegNum); 5999 return makeZeroedRegister(Ty, RegNum);
6021 } 6000 }
6022 Variable *Base = nullptr; 6001 Variable *Base = nullptr;
6023 std::string Buffer; 6002 std::string Buffer;
6024 llvm::raw_string_ostream StrBuf(Buffer); 6003 llvm::raw_string_ostream StrBuf(Buffer);
6025 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); 6004 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
6026 llvm::cast<Constant>(From)->setShouldBePooled(true); 6005 llvm::cast<Constant>(From)->setShouldBePooled(true);
6027 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 6006 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
6028 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 6007 From = X86OperandMem::create(Func, Ty, Base, Offset);
6029 } 6008 }
6030 bool NeedsReg = false; 6009 bool NeedsReg = false;
6031 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) 6010 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
6032 // Immediate specifically not allowed 6011 // Immediate specifically not allowed
6033 NeedsReg = true; 6012 NeedsReg = true;
6034 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 6013 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
6035 // On x86, FP constants are lowered to mem operands. 6014 // On x86, FP constants are lowered to mem operands.
6036 NeedsReg = true; 6015 NeedsReg = true;
6037 if (NeedsReg) { 6016 if (NeedsReg) {
6038 From = copyToReg(From, RegNum); 6017 From = copyToReg(From, RegNum);
(...skipping 11 matching lines...) Expand all
6050 // - Mem is not allowed and Var isn't guaranteed a physical register, or 6029 // - Mem is not allowed and Var isn't guaranteed a physical register, or
6051 // - RegNum is required and Var->getRegNum() doesn't match, or 6030 // - RegNum is required and Var->getRegNum() doesn't match, or
6052 // - Var is a rematerializable variable and rematerializable pass-through is 6031 // - Var is a rematerializable variable and rematerializable pass-through is
6053 // not allowed (in which case we need an lea instruction). 6032 // not allowed (in which case we need an lea instruction).
6054 if (MustRematerialize) { 6033 if (MustRematerialize) {
6055 assert(Ty == IceType_i32); 6034 assert(Ty == IceType_i32);
6056 Variable *NewVar = makeReg(Ty, RegNum); 6035 Variable *NewVar = makeReg(Ty, RegNum);
6057 // Since Var is rematerializable, the offset will be added when the lea is 6036 // Since Var is rematerializable, the offset will be added when the lea is
6058 // emitted. 6037 // emitted.
6059 constexpr Constant *NoOffset = nullptr; 6038 constexpr Constant *NoOffset = nullptr;
6060 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset); 6039 auto *Mem = X86OperandMem::create(Func, Ty, Var, NoOffset);
6061 _lea(NewVar, Mem); 6040 _lea(NewVar, Mem);
6062 From = NewVar; 6041 From = NewVar;
6063 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 6042 } else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6064 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) || 6043 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) ||
6065 MustRematerialize) { 6044 MustRematerialize) {
6066 From = copyToReg(From, RegNum); 6045 From = copyToReg(From, RegNum);
6067 } 6046 }
6068 return From; 6047 return From;
6069 } 6048 }
6070 llvm_unreachable("Unhandled operand kind in legalize()"); 6049 llvm_unreachable("Unhandled operand kind in legalize()");
6071 return From; 6050 return From;
6072 } 6051 }
6073 6052
6074 /// Provide a trivial wrapper to legalize() for this common usage. 6053 /// Provide a trivial wrapper to legalize() for this common usage.
6075 template <class Machine> 6054 template <typename TraitsType>
6076 Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) { 6055 Variable *TargetX86Base<TraitsType>::legalizeToReg(Operand *From,
6056 int32_t RegNum) {
6077 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 6057 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6078 } 6058 }
6079 6059
6080 /// Legalize undef values to concrete values. 6060 /// Legalize undef values to concrete values.
6081 template <class Machine> 6061 template <typename TraitsType>
6082 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { 6062 Operand *TargetX86Base<TraitsType>::legalizeUndef(Operand *From,
6063 int32_t RegNum) {
6083 Type Ty = From->getType(); 6064 Type Ty = From->getType();
6084 if (llvm::isa<ConstantUndef>(From)) { 6065 if (llvm::isa<ConstantUndef>(From)) {
6085 // Lower undefs to zero. Another option is to lower undefs to an 6066 // Lower undefs to zero. Another option is to lower undefs to an
6086 // uninitialized register; however, using an uninitialized register results 6067 // uninitialized register; however, using an uninitialized register results
6087 // in less predictable code. 6068 // in less predictable code.
6088 // 6069 //
6089 // If in the future the implementation is changed to lower undef values to 6070 // If in the future the implementation is changed to lower undef values to
6090 // uninitialized registers, a FakeDef will be needed: 6071 // uninitialized registers, a FakeDef will be needed:
6091 // Context.insert<InstFakeDef>(Reg); 6072 // Context.insert<InstFakeDef>(Reg);
6092 // This is in order to ensure that the live range of Reg is not 6073 // This is in order to ensure that the live range of Reg is not
6093 // overestimated. If the constant being lowered is a 64 bit value, then 6074 // overestimated. If the constant being lowered is a 64 bit value, then
6094 // the result should be split and the lo and hi components will need to go 6075 // the result should be split and the lo and hi components will need to go
6095 // in uninitialized registers. 6076 // in uninitialized registers.
6096 if (isVectorType(Ty)) 6077 if (isVectorType(Ty))
6097 return makeVectorOfZeros(Ty, RegNum); 6078 return makeVectorOfZeros(Ty, RegNum);
6098 return Ctx->getConstantZero(Ty); 6079 return Ctx->getConstantZero(Ty);
6099 } 6080 }
6100 return From; 6081 return From;
6101 } 6082 }
6102 6083
6103 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical 6084 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical
6104 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be 6085 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
6105 /// copied into a physical register. (Actually, either Src0 or Src1 can be 6086 /// copied into a physical register. (Actually, either Src0 or Src1 can be
6106 /// chosen for the physical register, but unfortunately we have to commit to one 6087 /// chosen for the physical register, but unfortunately we have to commit to one
6107 /// or the other before register allocation.) 6088 /// or the other before register allocation.)
6108 template <class Machine> 6089 template <typename TraitsType>
6109 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, 6090 Operand *TargetX86Base<TraitsType>::legalizeSrc0ForCmp(Operand *Src0,
6110 Operand *Src1) { 6091 Operand *Src1) {
6111 bool IsSrc1ImmOrReg = false; 6092 bool IsSrc1ImmOrReg = false;
6112 if (llvm::isa<Constant>(Src1)) { 6093 if (llvm::isa<Constant>(Src1)) {
6113 IsSrc1ImmOrReg = true; 6094 IsSrc1ImmOrReg = true;
6114 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) { 6095 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
6115 if (Var->hasReg()) 6096 if (Var->hasReg())
6116 IsSrc1ImmOrReg = true; 6097 IsSrc1ImmOrReg = true;
6117 } 6098 }
6118 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 6099 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
6119 } 6100 }
6120 6101
6121 template <class Machine> 6102 template <typename TraitsType>
6122 typename TargetX86Base<Machine>::Traits::X86OperandMem * 6103 typename TargetX86Base<TraitsType>::X86OperandMem *
6123 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty, 6104 TargetX86Base<TraitsType>::formMemoryOperand(Operand *Opnd, Type Ty,
6124 bool DoLegalize) { 6105 bool DoLegalize) {
6125 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd); 6106 auto *Mem = llvm::dyn_cast<X86OperandMem>(Opnd);
6126 // It may be the case that address mode optimization already creates an 6107 // It may be the case that address mode optimization already creates an
6127 // Traits::X86OperandMem, so in that case it wouldn't need another level of 6108 // X86OperandMem, so in that case it wouldn't need another level of
6128 // transformation. 6109 // transformation.
6129 if (!Mem) { 6110 if (!Mem) {
6130 auto *Base = llvm::dyn_cast<Variable>(Opnd); 6111 auto *Base = llvm::dyn_cast<Variable>(Opnd);
6131 auto *Offset = llvm::dyn_cast<Constant>(Opnd); 6112 auto *Offset = llvm::dyn_cast<Constant>(Opnd);
6132 assert(Base || Offset); 6113 assert(Base || Offset);
6133 if (Offset) { 6114 if (Offset) {
6134 // During memory operand building, we do not blind or pool the constant 6115 // During memory operand building, we do not blind or pool the constant
6135 // offset, we will work on the whole memory operand later as one entity 6116 // offset, we will work on the whole memory operand later as one entity
6136 // later, this save one instruction. By turning blinding and pooling off, 6117 // later, this save one instruction. By turning blinding and pooling off,
6137 // we guarantee legalize(Offset) will return a Constant*. 6118 // we guarantee legalize(Offset) will return a Constant*.
6138 { 6119 {
6139 BoolFlagSaver B(RandomizationPoolingPaused, true); 6120 BoolFlagSaver B(RandomizationPoolingPaused, true);
6140 6121
6141 Offset = llvm::cast<Constant>(legalize(Offset)); 6122 Offset = llvm::cast<Constant>(legalize(Offset));
6142 } 6123 }
6143 6124
6144 assert(llvm::isa<ConstantInteger32>(Offset) || 6125 assert(llvm::isa<ConstantInteger32>(Offset) ||
6145 llvm::isa<ConstantRelocatable>(Offset)); 6126 llvm::isa<ConstantRelocatable>(Offset));
6146 } 6127 }
6147 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 6128 Mem = X86OperandMem::create(Func, Ty, Base, Offset);
6148 } 6129 }
6149 // Do legalization, which contains randomization/pooling or do 6130 // Do legalization, which contains randomization/pooling or do
6150 // randomization/pooling. 6131 // randomization/pooling.
6151 return llvm::cast<typename Traits::X86OperandMem>( 6132 return llvm::cast<X86OperandMem>(DoLegalize ? legalize(Mem)
6152 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 6133 : randomizeOrPoolImmediate(Mem));
6153 } 6134 }
6154 6135
6155 template <class Machine> 6136 template <typename TraitsType>
6156 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 6137 Variable *TargetX86Base<TraitsType>::makeReg(Type Type, int32_t RegNum) {
6157 // There aren't any 64-bit integer registers for x86-32. 6138 // There aren't any 64-bit integer registers for x86-32.
6158 assert(Traits::Is64Bit || Type != IceType_i64); 6139 assert(Traits::Is64Bit || Type != IceType_i64);
6159 Variable *Reg = Func->makeVariable(Type); 6140 Variable *Reg = Func->makeVariable(Type);
6160 if (RegNum == Variable::NoRegister) 6141 if (RegNum == Variable::NoRegister)
6161 Reg->setMustHaveReg(); 6142 Reg->setMustHaveReg();
6162 else 6143 else
6163 Reg->setRegNum(RegNum); 6144 Reg->setRegNum(RegNum);
6164 return Reg; 6145 return Reg;
6165 } 6146 }
6166 6147
6167 template <class Machine> 6148 template <typename TraitsType>
6168 const Type TargetX86Base<Machine>::TypeForSize[] = { 6149 const Type TargetX86Base<TraitsType>::TypeForSize[] = {
6169 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; 6150 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
6170 template <class Machine> 6151 template <typename TraitsType>
6171 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, 6152 Type TargetX86Base<TraitsType>::largestTypeInSize(uint32_t Size,
6172 uint32_t MaxSize) { 6153 uint32_t MaxSize) {
6173 assert(Size != 0); 6154 assert(Size != 0);
6174 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); 6155 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
6175 uint32_t MaxIndex = MaxSize == NoSizeLimit 6156 uint32_t MaxIndex = MaxSize == NoSizeLimit
6176 ? llvm::array_lengthof(TypeForSize) - 1 6157 ? llvm::array_lengthof(TypeForSize) - 1
6177 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); 6158 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
6178 return TypeForSize[std::min(TyIndex, MaxIndex)]; 6159 return TypeForSize[std::min(TyIndex, MaxIndex)];
6179 } 6160 }
6180 6161
6181 template <class Machine> 6162 template <typename TraitsType>
6182 Type TargetX86Base<Machine>::firstTypeThatFitsSize(uint32_t Size, 6163 Type TargetX86Base<TraitsType>::firstTypeThatFitsSize(uint32_t Size,
6183 uint32_t MaxSize) { 6164 uint32_t MaxSize) {
6184 assert(Size != 0); 6165 assert(Size != 0);
6185 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); 6166 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
6186 if (!llvm::isPowerOf2_32(Size)) 6167 if (!llvm::isPowerOf2_32(Size))
6187 ++TyIndex; 6168 ++TyIndex;
6188 uint32_t MaxIndex = MaxSize == NoSizeLimit 6169 uint32_t MaxIndex = MaxSize == NoSizeLimit
6189 ? llvm::array_lengthof(TypeForSize) - 1 6170 ? llvm::array_lengthof(TypeForSize) - 1
6190 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); 6171 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
6191 return TypeForSize[std::min(TyIndex, MaxIndex)]; 6172 return TypeForSize[std::min(TyIndex, MaxIndex)];
6192 } 6173 }
6193 6174
6194 template <class Machine> void TargetX86Base<Machine>::postLower() { 6175 template <typename TraitsType> void TargetX86Base<TraitsType>::postLower() {
6195 if (Ctx->getFlags().getOptLevel() == Opt_m1) 6176 if (Ctx->getFlags().getOptLevel() == Opt_m1)
6196 return; 6177 return;
6197 markRedefinitions(); 6178 markRedefinitions();
6198 Context.availabilityUpdate(); 6179 Context.availabilityUpdate();
6199 } 6180 }
6200 6181
6201 template <class Machine> 6182 template <typename TraitsType>
6202 void TargetX86Base<Machine>::makeRandomRegisterPermutation( 6183 void TargetX86Base<TraitsType>::makeRandomRegisterPermutation(
6203 llvm::SmallVectorImpl<int32_t> &Permutation, 6184 llvm::SmallVectorImpl<int32_t> &Permutation,
6204 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const { 6185 const llvm::SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
6205 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation, 6186 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
6206 ExcludeRegisters, Salt); 6187 ExcludeRegisters, Salt);
6207 } 6188 }
6208 6189
6209 template <class Machine> 6190 template <typename TraitsType>
6210 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { 6191 void TargetX86Base<TraitsType>::emit(const ConstantInteger32 *C) const {
6211 if (!BuildDefs::dump()) 6192 if (!BuildDefs::dump())
6212 return; 6193 return;
6213 Ostream &Str = Ctx->getStrEmit(); 6194 Ostream &Str = Ctx->getStrEmit();
6214 Str << getConstantPrefix() << C->getValue(); 6195 Str << getConstantPrefix() << C->getValue();
6215 } 6196 }
6216 6197
6217 template <class Machine> 6198 template <typename TraitsType>
6218 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const { 6199 void TargetX86Base<TraitsType>::emit(const ConstantInteger64 *C) const {
6219 if (!Traits::Is64Bit) { 6200 if (!Traits::Is64Bit) {
6220 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 6201 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6221 } else { 6202 } else {
6222 if (!BuildDefs::dump()) 6203 if (!BuildDefs::dump())
6223 return; 6204 return;
6224 Ostream &Str = Ctx->getStrEmit(); 6205 Ostream &Str = Ctx->getStrEmit();
6225 Str << getConstantPrefix() << C->getValue(); 6206 Str << getConstantPrefix() << C->getValue();
6226 } 6207 }
6227 } 6208 }
6228 6209
6229 template <class Machine> 6210 template <typename TraitsType>
6230 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { 6211 void TargetX86Base<TraitsType>::emit(const ConstantFloat *C) const {
6231 if (!BuildDefs::dump()) 6212 if (!BuildDefs::dump())
6232 return; 6213 return;
6233 Ostream &Str = Ctx->getStrEmit(); 6214 Ostream &Str = Ctx->getStrEmit();
6234 C->emitPoolLabel(Str, Ctx); 6215 C->emitPoolLabel(Str, Ctx);
6235 } 6216 }
6236 6217
6237 template <class Machine> 6218 template <typename TraitsType>
6238 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const { 6219 void TargetX86Base<TraitsType>::emit(const ConstantDouble *C) const {
6239 if (!BuildDefs::dump()) 6220 if (!BuildDefs::dump())
6240 return; 6221 return;
6241 Ostream &Str = Ctx->getStrEmit(); 6222 Ostream &Str = Ctx->getStrEmit();
6242 C->emitPoolLabel(Str, Ctx); 6223 C->emitPoolLabel(Str, Ctx);
6243 } 6224 }
6244 6225
6245 template <class Machine> 6226 template <typename TraitsType>
6246 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { 6227 void TargetX86Base<TraitsType>::emit(const ConstantUndef *) const {
6247 llvm::report_fatal_error("undef value encountered by emitter."); 6228 llvm::report_fatal_error("undef value encountered by emitter.");
6248 } 6229 }
6249 6230
6250 /// Randomize or pool an Immediate. 6231 /// Randomize or pool an Immediate.
6251 template <class Machine> 6232 template <typename TraitsType>
6252 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, 6233 Operand *
6253 int32_t RegNum) { 6234 TargetX86Base<TraitsType>::randomizeOrPoolImmediate(Constant *Immediate,
6235 int32_t RegNum) {
6254 assert(llvm::isa<ConstantInteger32>(Immediate) || 6236 assert(llvm::isa<ConstantInteger32>(Immediate) ||
6255 llvm::isa<ConstantRelocatable>(Immediate)); 6237 llvm::isa<ConstantRelocatable>(Immediate));
6256 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 6238 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
6257 RandomizationPoolingPaused == true) { 6239 RandomizationPoolingPaused == true) {
6258 // Immediates randomization/pooling off or paused 6240 // Immediates randomization/pooling off or paused
6259 return Immediate; 6241 return Immediate;
6260 } 6242 }
6261 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { 6243 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
6262 Ctx->statsUpdateRPImms(); 6244 Ctx->statsUpdateRPImms();
6263 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 6245 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
6264 RPI_Randomize) { 6246 RPI_Randomize) {
6265 // blind the constant 6247 // blind the constant
6266 // FROM: 6248 // FROM:
6267 // imm 6249 // imm
6268 // TO: 6250 // TO:
6269 // insert: mov imm+cookie, Reg 6251 // insert: mov imm+cookie, Reg
6270 // insert: lea -cookie[Reg], Reg 6252 // insert: lea -cookie[Reg], Reg
6271 // => Reg 6253 // => Reg
6272 // If we have already assigned a phy register, we must come from 6254 // If we have already assigned a phy register, we must come from
6273 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the 6255 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
6274 // assigned register as this assignment is that start of its use-def 6256 // assigned register as this assignment is that start of its use-def
6275 // chain. So we add RegNum argument here. Note we use 'lea' instruction 6257 // chain. So we add RegNum argument here. Note we use 'lea' instruction
6276 // instead of 'xor' to avoid affecting the flags. 6258 // instead of 'xor' to avoid affecting the flags.
6277 Variable *Reg = makeReg(IceType_i32, RegNum); 6259 Variable *Reg = makeReg(IceType_i32, RegNum);
6278 auto *Integer = llvm::cast<ConstantInteger32>(Immediate); 6260 auto *Integer = llvm::cast<ConstantInteger32>(Immediate);
6279 uint32_t Value = Integer->getValue(); 6261 uint32_t Value = Integer->getValue();
6280 uint32_t Cookie = Func->getConstantBlindingCookie(); 6262 uint32_t Cookie = Func->getConstantBlindingCookie();
6281 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); 6263 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
6282 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); 6264 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
6283 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset, 6265 _lea(Reg,
6284 nullptr, 0)); 6266 X86OperandMem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));
6285 if (Immediate->getType() != IceType_i32) { 6267 if (Immediate->getType() != IceType_i32) {
6286 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); 6268 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
6287 _mov(TruncReg, Reg); 6269 _mov(TruncReg, Reg);
6288 return TruncReg; 6270 return TruncReg;
6289 } 6271 }
6290 return Reg; 6272 return Reg;
6291 } 6273 }
6292 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { 6274 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
6293 // pool the constant 6275 // pool the constant
6294 // FROM: 6276 // FROM:
6295 // imm 6277 // imm
6296 // TO: 6278 // TO:
6297 // insert: mov $label, Reg 6279 // insert: mov $label, Reg
6298 // => Reg 6280 // => Reg
6299 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); 6281 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
6300 Immediate->setShouldBePooled(true); 6282 Immediate->setShouldBePooled(true);
6301 // if we have already assigned a phy register, we must come from 6283 // if we have already assigned a phy register, we must come from
6302 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the 6284 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
6303 // assigned register as this assignment is that start of its use-def 6285 // assigned register as this assignment is that start of its use-def
6304 // chain. So we add RegNum argument here. 6286 // chain. So we add RegNum argument here.
6305 Variable *Reg = makeReg(Immediate->getType(), RegNum); 6287 Variable *Reg = makeReg(Immediate->getType(), RegNum);
6306 IceString Label; 6288 IceString Label;
6307 llvm::raw_string_ostream Label_stream(Label); 6289 llvm::raw_string_ostream Label_stream(Label);
6308 Immediate->emitPoolLabel(Label_stream, Ctx); 6290 Immediate->emitPoolLabel(Label_stream, Ctx);
6309 constexpr RelocOffsetT Offset = 0; 6291 constexpr RelocOffsetT Offset = 0;
6310 constexpr bool SuppressMangling = true; 6292 constexpr bool SuppressMangling = true;
6311 Constant *Symbol = 6293 Constant *Symbol =
6312 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); 6294 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
6313 typename Traits::X86OperandMem *MemOperand = 6295 X86OperandMem *MemOperand =
6314 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, 6296 X86OperandMem::create(Func, Immediate->getType(), nullptr, Symbol);
6315 Symbol);
6316 _mov(Reg, MemOperand); 6297 _mov(Reg, MemOperand);
6317 return Reg; 6298 return Reg;
6318 } 6299 }
6319 assert("Unsupported -randomize-pool-immediates option" && false); 6300 assert("Unsupported -randomize-pool-immediates option" && false);
6320 } 6301 }
6321 // the constant Immediate is not eligible for blinding/pooling 6302 // the constant Immediate is not eligible for blinding/pooling
6322 return Immediate; 6303 return Immediate;
6323 } 6304 }
6324 6305
6325 template <class Machine> 6306 template <typename TraitsType>
6326 typename TargetX86Base<Machine>::Traits::X86OperandMem * 6307 typename TargetX86Base<TraitsType>::X86OperandMem *
6327 TargetX86Base<Machine>::randomizeOrPoolImmediate( 6308 TargetX86Base<TraitsType>::randomizeOrPoolImmediate(X86OperandMem *MemOperand,
6328 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) { 6309 int32_t RegNum) {
6329 assert(MemOperand); 6310 assert(MemOperand);
6330 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 6311 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
6331 RandomizationPoolingPaused == true) { 6312 RandomizationPoolingPaused == true) {
6332 // immediates randomization/pooling is turned off 6313 // immediates randomization/pooling is turned off
6333 return MemOperand; 6314 return MemOperand;
6334 } 6315 }
6335 6316
6336 // If this memory operand is already a randomized one, we do not randomize it 6317 // If this memory operand is already a randomized one, we do not randomize it
6337 // again. 6318 // again.
6338 if (MemOperand->getRandomized()) 6319 if (MemOperand->getRandomized())
(...skipping 13 matching lines...) Expand all
6352 // => -cookie[RegTemp, index, shift] 6333 // => -cookie[RegTemp, index, shift]
6353 uint32_t Value = 6334 uint32_t Value =
6354 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) 6335 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
6355 ->getValue(); 6336 ->getValue();
6356 uint32_t Cookie = Func->getConstantBlindingCookie(); 6337 uint32_t Cookie = Func->getConstantBlindingCookie();
6357 Constant *Mask1 = Ctx->getConstantInt( 6338 Constant *Mask1 = Ctx->getConstantInt(
6358 MemOperand->getOffset()->getType(), Cookie + Value); 6339 MemOperand->getOffset()->getType(), Cookie + Value);
6359 Constant *Mask2 = 6340 Constant *Mask2 =
6360 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 6341 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
6361 6342
6362 typename Traits::X86OperandMem *TempMemOperand = 6343 X86OperandMem *TempMemOperand = X86OperandMem::create(
6363 Traits::X86OperandMem::create(Func, MemOperand->getType(), 6344 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
6364 MemOperand->getBase(), Mask1);
6365 // If we have already assigned a physical register, we must come from 6345 // If we have already assigned a physical register, we must come from
6366 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 6346 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
6367 // the assigned register as this assignment is that start of its 6347 // the assigned register as this assignment is that start of its
6368 // use-def chain. So we add RegNum argument here. 6348 // use-def chain. So we add RegNum argument here.
6369 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 6349 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
6370 _lea(RegTemp, TempMemOperand); 6350 _lea(RegTemp, TempMemOperand);
6371 6351
6372 typename Traits::X86OperandMem *NewMemOperand = 6352 X86OperandMem *NewMemOperand = X86OperandMem::create(
6373 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, 6353 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
6374 Mask2, MemOperand->getIndex(), 6354 MemOperand->getShift(), MemOperand->getSegmentRegister());
6375 MemOperand->getShift(),
6376 MemOperand->getSegmentRegister());
6377 6355
6378 // Label this memory operand as randomized, so we won't randomize it 6356 // Label this memory operand as randomized, so we won't randomize it
6379 // again in case we call legalize() multiple times on this memory 6357 // again in case we call legalize() multiple times on this memory
6380 // operand. 6358 // operand.
6381 NewMemOperand->setRandomized(true); 6359 NewMemOperand->setRandomized(true);
6382 return NewMemOperand; 6360 return NewMemOperand;
6383 } 6361 }
6384 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { 6362 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
6385 // pool the constant offset 6363 // pool the constant offset
6386 // FROM: 6364 // FROM:
(...skipping 13 matching lines...) Expand all
6400 return MemOperand; 6378 return MemOperand;
6401 Variable *RegTemp = makeReg(IceType_i32); 6379 Variable *RegTemp = makeReg(IceType_i32);
6402 IceString Label; 6380 IceString Label;
6403 llvm::raw_string_ostream Label_stream(Label); 6381 llvm::raw_string_ostream Label_stream(Label);
6404 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); 6382 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);
6405 MemOperand->getOffset()->setShouldBePooled(true); 6383 MemOperand->getOffset()->setShouldBePooled(true);
6406 constexpr RelocOffsetT SymOffset = 0; 6384 constexpr RelocOffsetT SymOffset = 0;
6407 constexpr bool SuppressMangling = true; 6385 constexpr bool SuppressMangling = true;
6408 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), 6386 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
6409 SuppressMangling); 6387 SuppressMangling);
6410 typename Traits::X86OperandMem *SymbolOperand = 6388 X86OperandMem *SymbolOperand = X86OperandMem::create(
6411 Traits::X86OperandMem::create( 6389 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
6412 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
6413 _mov(RegTemp, SymbolOperand); 6390 _mov(RegTemp, SymbolOperand);
6414 // If we have a base variable here, we should add the lea instruction 6391 // If we have a base variable here, we should add the lea instruction
6415 // to add the value of the base variable to RegTemp. If there is no 6392 // to add the value of the base variable to RegTemp. If there is no
6416 // base variable, we won't need this lea instruction. 6393 // base variable, we won't need this lea instruction.
6417 if (MemOperand->getBase()) { 6394 if (MemOperand->getBase()) {
6418 typename Traits::X86OperandMem *CalculateOperand = 6395 X86OperandMem *CalculateOperand = X86OperandMem::create(
6419 Traits::X86OperandMem::create( 6396 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
6420 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, 6397 RegTemp, 0, MemOperand->getSegmentRegister());
6421 RegTemp, 0, MemOperand->getSegmentRegister());
6422 _lea(RegTemp, CalculateOperand); 6398 _lea(RegTemp, CalculateOperand);
6423 } 6399 }
6424 typename Traits::X86OperandMem *NewMemOperand = 6400 X86OperandMem *NewMemOperand = X86OperandMem::create(
6425 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, 6401 Func, MemOperand->getType(), RegTemp, nullptr,
6426 nullptr, MemOperand->getIndex(), 6402 MemOperand->getIndex(), MemOperand->getShift(),
6427 MemOperand->getShift(), 6403 MemOperand->getSegmentRegister());
6428 MemOperand->getSegmentRegister());
6429 return NewMemOperand; 6404 return NewMemOperand;
6430 } 6405 }
6431 assert("Unsupported -randomize-pool-immediates option" && false); 6406 assert("Unsupported -randomize-pool-immediates option" && false);
6432 } 6407 }
6433 } 6408 }
6434 // the offset is not eligible for blinding or pooling, return the original 6409 // the offset is not eligible for blinding or pooling, return the original
6435 // mem operand 6410 // mem operand
6436 return MemOperand; 6411 return MemOperand;
6437 } 6412 }
6438 6413
6439 } // end of namespace X86Internal 6414 } // end of namespace X86NAMESPACE
6440 } // end of namespace Ice 6415 } // end of namespace Ice
6441 6416
6442 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6417 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698