Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(668)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX86Base class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. 12 // high-level instruction.
13 // 13 //
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18
16 #include "llvm/Support/MathExtras.h" 19 #include "llvm/Support/MathExtras.h"
17 20
18 #include "IceCfg.h" 21 #include "IceCfg.h"
19 #include "IceCfgNode.h" 22 #include "IceCfgNode.h"
20 #include "IceClFlags.h" 23 #include "IceClFlags.h"
21 #include "IceDefs.h" 24 #include "IceDefs.h"
22 #include "IceELFObjectWriter.h" 25 #include "IceELFObjectWriter.h"
23 #include "IceGlobalInits.h" 26 #include "IceGlobalInits.h"
24 #include "IceInstX8632.h" 27 #include "IceInstX8632.h"
25 #include "IceLiveness.h" 28 #include "IceLiveness.h"
26 #include "IceOperand.h" 29 #include "IceOperand.h"
27 #include "IceRegistersX8632.h" 30 #include "IceRegistersX8632.h"
28 #include "IceTargetLoweringX8632.def" 31 #include "IceTargetLoweringX8632.def"
29 #include "IceTargetLoweringX8632.h" 32 #include "IceTargetLoweringX8632.h"
30 #include "IceUtils.h" 33 #include "IceUtils.h"
31 34
32 namespace Ice { 35 namespace Ice {
33 36 namespace X86Internal {
34 namespace {
35
36 // The following table summarizes the logic for lowering the fcmp
37 // instruction. There is one table entry for each of the 16 conditions.
38 //
39 // The first four columns describe the case when the operands are
40 // floating point scalar values. A comment in lowerFcmp() describes the
41 // lowering template. In the most general case, there is a compare
42 // followed by two conditional branches, because some fcmp conditions
43 // don't map to a single x86 conditional branch. However, in many cases
44 // it is possible to swap the operands in the comparison and have a
45 // single conditional branch. Since it's quite tedious to validate the
46 // table by hand, good execution tests are helpful.
47 //
48 // The last two columns describe the case when the operands are vectors
49 // of floating point values. For most fcmp conditions, there is a clear
50 // mapping to a single x86 cmpps instruction variant. Some fcmp
51 // conditions require special code to handle and these are marked in the
52 // table with a Cmpps_Invalid predicate.
53 const struct TableFcmp_ {
54 uint32_t Default;
55 bool SwapScalarOperands;
56 CondX86::BrCond C1, C2;
57 bool SwapVectorOperands;
58 CondX86::CmppsCond Predicate;
59 } TableFcmp[] = {
60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
62 ,
63 FCMPX8632_TABLE
64 #undef X
65 };
66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
67
68 // The following table summarizes the logic for lowering the icmp instruction
69 // for i32 and narrower types. Each icmp condition has a clear mapping to an
70 // x86 conditional branch instruction.
71
72 const struct TableIcmp32_ {
73 CondX86::BrCond Mapping;
74 } TableIcmp32[] = {
75 #define X(val, C_32, C1_64, C2_64, C3_64) \
76 { CondX86::C_32 } \
77 ,
78 ICMPX8632_TABLE
79 #undef X
80 };
81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
82
83 // The following table summarizes the logic for lowering the icmp instruction
84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
85 // conditional branches are needed. For the other conditions, three separate
86 // conditional branches are needed.
87 const struct TableIcmp64_ {
88 CondX86::BrCond C1, C2, C3;
89 } TableIcmp64[] = {
90 #define X(val, C_32, C1_64, C2_64, C3_64) \
91 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
92 ,
93 ICMPX8632_TABLE
94 #undef X
95 };
96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
97
98 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
99 size_t Index = static_cast<size_t>(Cond);
100 assert(Index < TableIcmp32Size);
101 return TableIcmp32[Index].Mapping;
102 }
103
104 const struct TableTypeX8632Attributes_ {
105 Type InVectorElementType;
106 } TableTypeX8632Attributes[] = {
107 #define X(tag, elementty, cvt, sdss, pack, width, fld) \
108 { elementty } \
109 ,
110 ICETYPEX8632_TABLE
111 #undef X
112 };
113 const size_t TableTypeX8632AttributesSize =
114 llvm::array_lengthof(TableTypeX8632Attributes);
115
116 // Return the type which the elements of the vector have in the X86
117 // representation of the vector.
118 Type getInVectorElementType(Type Ty) {
119 assert(isVectorType(Ty));
120 size_t Index = static_cast<size_t>(Ty);
121 (void)Index;
122 assert(Index < TableTypeX8632AttributesSize);
123 return TableTypeX8632Attributes[Ty].InVectorElementType;
124 }
125
126 // The maximum number of arguments to pass in XMM registers
127 const uint32_t X86_MAX_XMM_ARGS = 4;
128 // The number of bits in a byte
129 const uint32_t X86_CHAR_BIT = 8;
130 // Stack alignment
131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
132 // Size of the return address on the stack
133 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
134 // The number of different NOP instructions
135 const uint32_t X86_NUM_NOP_VARIANTS = 5;
136
137 // Value is in bytes. Return Value adjusted to the next highest multiple
138 // of the stack alignment.
139 uint32_t applyStackAlignment(uint32_t Value) {
140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
141 }
142
143 // In some cases, there are x-macros tables for both high-level and
144 // low-level instructions/operands that use the same enum key value.
145 // The tables are kept separate to maintain a proper separation
146 // between abstraction layers. There is a risk that the tables could
147 // get out of sync if enum values are reordered or if entries are
148 // added or deleted. The following dummy namespaces use
149 // static_asserts to ensure everything is kept in sync.
150
151 // Validate the enum values in FCMPX8632_TABLE.
152 namespace dummy1 {
153 // Define a temporary set of enum values based on low-level table
154 // entries.
155 enum _tmp_enum {
156 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
157 FCMPX8632_TABLE
158 #undef X
159 _num
160 };
161 // Define a set of constants based on high-level table entries.
162 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
163 ICEINSTFCMP_TABLE
164 #undef X
165 // Define a set of constants based on low-level table entries, and
166 // ensure the table entry keys are consistent.
167 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
168 static const int _table2_##val = _tmp_##val; \
169 static_assert( \
170 _table1_##val == _table2_##val, \
171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
172 FCMPX8632_TABLE
173 #undef X
174 // Repeat the static asserts with respect to the high-level table
175 // entries in case the high-level table has extra entries.
176 #define X(tag, str) \
177 static_assert( \
178 _table1_##tag == _table2_##tag, \
179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
180 ICEINSTFCMP_TABLE
181 #undef X
182 } // end of namespace dummy1
183
184 // Validate the enum values in ICMPX8632_TABLE.
185 namespace dummy2 {
186 // Define a temporary set of enum values based on low-level table
187 // entries.
188 enum _tmp_enum {
189 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
190 ICMPX8632_TABLE
191 #undef X
192 _num
193 };
194 // Define a set of constants based on high-level table entries.
195 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
196 ICEINSTICMP_TABLE
197 #undef X
198 // Define a set of constants based on low-level table entries, and
199 // ensure the table entry keys are consistent.
200 #define X(val, C_32, C1_64, C2_64, C3_64) \
201 static const int _table2_##val = _tmp_##val; \
202 static_assert( \
203 _table1_##val == _table2_##val, \
204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
205 ICMPX8632_TABLE
206 #undef X
207 // Repeat the static asserts with respect to the high-level table
208 // entries in case the high-level table has extra entries.
209 #define X(tag, str) \
210 static_assert( \
211 _table1_##tag == _table2_##tag, \
212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
213 ICEINSTICMP_TABLE
214 #undef X
215 } // end of namespace dummy2
216
217 // Validate the enum values in ICETYPEX8632_TABLE.
218 namespace dummy3 {
219 // Define a temporary set of enum values based on low-level table
220 // entries.
221 enum _tmp_enum {
222 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
223 ICETYPEX8632_TABLE
224 #undef X
225 _num
226 };
227 // Define a set of constants based on high-level table entries.
228 #define X(tag, size, align, elts, elty, str) \
229 static const int _table1_##tag = tag;
230 ICETYPE_TABLE
231 #undef X
232 // Define a set of constants based on low-level table entries, and
233 // ensure the table entry keys are consistent.
234 #define X(tag, elementty, cvt, sdss, pack, width, fld) \
235 static const int _table2_##tag = _tmp_##tag; \
236 static_assert(_table1_##tag == _table2_##tag, \
237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
238 ICETYPEX8632_TABLE
239 #undef X
240 // Repeat the static asserts with respect to the high-level table
241 // entries in case the high-level table has extra entries.
242 #define X(tag, size, align, elts, elty, str) \
243 static_assert(_table1_##tag == _table2_##tag, \
244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
245 ICETYPE_TABLE
246 #undef X
247 } // end of namespace dummy3
248 37
249 // A helper class to ease the settings of RandomizationPoolingPause 38 // A helper class to ease the settings of RandomizationPoolingPause
250 // to disable constant blinding or pooling for some translation phases. 39 // to disable constant blinding or pooling for some translation phases.
251 class BoolFlagSaver { 40 class BoolFlagSaver {
252 BoolFlagSaver() = delete; 41 BoolFlagSaver() = delete;
253 BoolFlagSaver(const BoolFlagSaver &) = delete; 42 BoolFlagSaver(const BoolFlagSaver &) = delete;
254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
255 44
256 public: 45 public:
257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
258 ~BoolFlagSaver() { Flag = OldValue; } 47 ~BoolFlagSaver() { Flag = OldValue; }
259 48
260 private: 49 private:
261 const bool OldValue; 50 const bool OldValue;
262 bool &Flag; 51 bool &Flag;
263 }; 52 };
264 53
265 } // end of anonymous namespace 54 template <class MachineTraits> class BoolFoldingEntry {
55 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
266 56
267 BoolFoldingEntry::BoolFoldingEntry(Inst *I) 57 public:
268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {} 58 BoolFoldingEntry() = default;
59 explicit BoolFoldingEntry(Inst *I);
60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
61 // Instr is the instruction producing the i1-type variable of interest.
62 Inst *Instr = nullptr;
63 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
64 bool IsComplex = false;
65 // IsLiveOut is initialized conservatively to true, and is set to false when
66 // we encounter an instruction that ends Var's live range. We disable the
67 // folding optimization when Var is live beyond this basic block. Note that
68 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
69 // always be true and the folding optimization will never be performed.
70 bool IsLiveOut = true;
71 // NumUses counts the number of times Var is used as a source operand in the
72 // basic block. If IsComplex is true and there is more than one use of Var,
73 // then the folding optimization is disabled for Var.
74 uint32_t NumUses = 0;
75 };
269 76
270 BoolFolding::BoolFoldingProducerKind 77 template <class MachineTraits> class BoolFolding {
271 BoolFolding::getProducerKind(const Inst *Instr) { 78 public:
79 enum BoolFoldingProducerKind {
80 PK_None,
81 PK_Icmp32,
82 PK_Icmp64,
83 PK_Fcmp,
84 PK_Trunc
85 };
86
87 // Currently the actual enum values are not used (other than CK_None), but we
88 // go
89 // ahead and produce them anyway for symmetry with the
90 // BoolFoldingProducerKind.
91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
92
93 private:
94 BoolFolding(const BoolFolding &) = delete;
95 BoolFolding &operator=(const BoolFolding &) = delete;
96
97 public:
98 BoolFolding() = default;
99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
101 static bool hasComplexLowering(const Inst *Instr);
102 void init(CfgNode *Node);
103 const Inst *getProducerFor(const Operand *Opnd) const;
104 void dump(const Cfg *Func) const;
105
106 private:
107 // Returns true if Producers contains a valid entry for the given VarNum.
108 bool containsValid(SizeT VarNum) const {
109 auto Element = Producers.find(VarNum);
110 return Element != Producers.end() && Element->second.Instr != nullptr;
111 }
112 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
113 // Producers maps Variable::Number to a BoolFoldingEntry.
114 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
115 };
116
117 template <class MachineTraits>
118 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
120
121 template <class MachineTraits>
122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
272 if (llvm::isa<InstIcmp>(Instr)) { 124 if (llvm::isa<InstIcmp>(Instr)) {
273 if (Instr->getSrc(0)->getType() != IceType_i64) 125 if (Instr->getSrc(0)->getType() != IceType_i64)
274 return PK_Icmp32; 126 return PK_Icmp32;
275 return PK_None; // TODO(stichnot): actually PK_Icmp64; 127 return PK_None; // TODO(stichnot): actually PK_Icmp64;
276 } 128 }
277 return PK_None; // TODO(stichnot): remove this 129 return PK_None; // TODO(stichnot): remove this
278 130
279 if (llvm::isa<InstFcmp>(Instr)) 131 if (llvm::isa<InstFcmp>(Instr))
280 return PK_Fcmp; 132 return PK_Fcmp;
281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 133 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
282 switch (Cast->getCastKind()) { 134 switch (Cast->getCastKind()) {
283 default: 135 default:
284 return PK_None; 136 return PK_None;
285 case InstCast::Trunc: 137 case InstCast::Trunc:
286 return PK_Trunc; 138 return PK_Trunc;
287 } 139 }
288 } 140 }
289 return PK_None; 141 return PK_None;
290 } 142 }
291 143
292 BoolFolding::BoolFoldingConsumerKind 144 template <class MachineTraits>
293 BoolFolding::getConsumerKind(const Inst *Instr) { 145 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
146 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
294 if (llvm::isa<InstBr>(Instr)) 147 if (llvm::isa<InstBr>(Instr))
295 return CK_Br; 148 return CK_Br;
296 if (llvm::isa<InstSelect>(Instr)) 149 if (llvm::isa<InstSelect>(Instr))
297 return CK_Select; 150 return CK_Select;
298 return CK_None; // TODO(stichnot): remove this 151 return CK_None; // TODO(stichnot): remove this
299 152
300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 153 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
301 switch (Cast->getCastKind()) { 154 switch (Cast->getCastKind()) {
302 default: 155 default:
303 return CK_None; 156 return CK_None;
304 case InstCast::Sext: 157 case InstCast::Sext:
305 return CK_Sext; 158 return CK_Sext;
306 case InstCast::Zext: 159 case InstCast::Zext:
307 return CK_Zext; 160 return CK_Zext;
308 } 161 }
309 } 162 }
310 return CK_None; 163 return CK_None;
311 } 164 }
312 165
313 // Returns true if the producing instruction has a "complex" lowering 166 // Returns true if the producing instruction has a "complex" lowering
314 // sequence. This generally means that its lowering sequence requires 167 // sequence. This generally means that its lowering sequence requires
315 // more than one conditional branch, namely 64-bit integer compares 168 // more than one conditional branch, namely 64-bit integer compares
316 // and some floating-point compares. When this is true, and there is 169 // and some floating-point compares. When this is true, and there is
317 // more than one consumer, we prefer to disable the folding 170 // more than one consumer, we prefer to disable the folding
318 // optimization because it minimizes branches. 171 // optimization because it minimizes branches.
319 bool BoolFolding::hasComplexLowering(const Inst *Instr) { 172 template <class MachineTraits>
173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
320 switch (getProducerKind(Instr)) { 174 switch (getProducerKind(Instr)) {
321 default: 175 default:
322 return false; 176 return false;
323 case PK_Icmp64: 177 case PK_Icmp64:
324 return true; 178 return true;
325 case PK_Fcmp: 179 case PK_Fcmp:
326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != 180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
327 CondX86::Br_None; 181 .C2 != CondX86::Br_None;
328 } 182 }
329 } 183 }
330 184
331 void BoolFolding::init(CfgNode *Node) { 185 template <class MachineTraits>
186 void BoolFolding<MachineTraits>::init(CfgNode *Node) {
332 Producers.clear(); 187 Producers.clear();
333 for (Inst &Instr : Node->getInsts()) { 188 for (Inst &Instr : Node->getInsts()) {
334 // Check whether Instr is a valid producer. 189 // Check whether Instr is a valid producer.
335 Variable *Var = Instr.getDest(); 190 Variable *Var = Instr.getDest();
336 if (!Instr.isDeleted() // only consider non-deleted instructions 191 if (!Instr.isDeleted() // only consider non-deleted instructions
337 && Var // only instructions with an actual dest var 192 && Var // only instructions with an actual dest var
338 && Var->getType() == IceType_i1 // only bool-type dest vars 193 && Var->getType() == IceType_i1 // only bool-type dest vars
339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions 194 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr); 195 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
341 } 196 }
342 // Check each src variable against the map. 197 // Check each src variable against the map.
343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { 198 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
344 Operand *Src = Instr.getSrc(I); 199 Operand *Src = Instr.getSrc(I);
345 SizeT NumVars = Src->getNumVars(); 200 SizeT NumVars = Src->getNumVars();
346 for (SizeT J = 0; J < NumVars; ++J) { 201 for (SizeT J = 0; J < NumVars; ++J) {
347 const Variable *Var = Src->getVar(J); 202 const Variable *Var = Src->getVar(J);
348 SizeT VarNum = Var->getIndex(); 203 SizeT VarNum = Var->getIndex();
349 if (containsValid(VarNum)) { 204 if (containsValid(VarNum)) {
350 if (I != 0 // All valid consumers use Var as the first source operand 205 if (I != 0 // All valid consumers use Var as the first source operand
(...skipping 21 matching lines...) Expand all
372 continue; 227 continue;
373 } 228 }
374 // Mark as "dead" rather than outright deleting. This is so that 229 // Mark as "dead" rather than outright deleting. This is so that
375 // other peephole style optimizations during or before lowering 230 // other peephole style optimizations during or before lowering
376 // have access to this instruction in undeleted form. See for 231 // have access to this instruction in undeleted form. See for
377 // example tryOptimizedCmpxchgCmpBr(). 232 // example tryOptimizedCmpxchgCmpBr().
378 I.second.Instr->setDead(); 233 I.second.Instr->setDead();
379 } 234 }
380 } 235 }
381 236
382 const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const { 237 template <class MachineTraits>
238 const Inst *
239 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
383 auto *Var = llvm::dyn_cast<const Variable>(Opnd); 240 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
384 if (Var == nullptr) 241 if (Var == nullptr)
385 return nullptr; 242 return nullptr;
386 SizeT VarNum = Var->getIndex(); 243 SizeT VarNum = Var->getIndex();
387 auto Element = Producers.find(VarNum); 244 auto Element = Producers.find(VarNum);
388 if (Element == Producers.end()) 245 if (Element == Producers.end())
389 return nullptr; 246 return nullptr;
390 return Element->second.Instr; 247 return Element->second.Instr;
391 } 248 }
392 249
393 void BoolFolding::dump(const Cfg *Func) const { 250 template <class MachineTraits>
251 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
394 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) 252 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
395 return; 253 return;
396 OstreamLocker L(Func->getContext()); 254 OstreamLocker L(Func->getContext());
397 Ostream &Str = Func->getContext()->getStrDump(); 255 Ostream &Str = Func->getContext()->getStrDump();
398 for (auto &I : Producers) { 256 for (auto &I : Producers) {
399 if (I.second.Instr == nullptr) 257 if (I.second.Instr == nullptr)
400 continue; 258 continue;
401 Str << "Found foldable producer:\n "; 259 Str << "Found foldable producer:\n ";
402 I.second.Instr->dump(Func); 260 I.second.Instr->dump(Func);
403 Str << "\n"; 261 Str << "\n";
404 } 262 }
405 } 263 }
406 264
407 void TargetX8632::initNodeForLowering(CfgNode *Node) { 265 template <class Machine>
266 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
408 FoldingInfo.init(Node); 267 FoldingInfo.init(Node);
409 FoldingInfo.dump(Func); 268 FoldingInfo.dump(Func);
410 } 269 }
411 270
412 TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) { 271 template <class Machine>
413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == 272 TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
414 (TargetInstructionSet::X86InstructionSet_End - 273 : Machine(Func) {
415 TargetInstructionSet::X86InstructionSet_Begin), 274 static_assert(
416 "X86InstructionSet range different from TargetInstructionSet"); 275 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
276 (TargetInstructionSet::X86InstructionSet_End -
277 TargetInstructionSet::X86InstructionSet_Begin),
278 "Traits::InstructionSet range different from TargetInstructionSet");
417 if (Func->getContext()->getFlags().getTargetInstructionSet() != 279 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
418 TargetInstructionSet::BaseInstructionSet) { 280 TargetInstructionSet::BaseInstructionSet) {
419 InstructionSet = static_cast<X86InstructionSet>( 281 InstructionSet = static_cast<typename Traits::InstructionSet>(
420 (Func->getContext()->getFlags().getTargetInstructionSet() - 282 (Func->getContext()->getFlags().getTargetInstructionSet() -
421 TargetInstructionSet::X86InstructionSet_Begin) + 283 TargetInstructionSet::X86InstructionSet_Begin) +
422 X86InstructionSet::Begin); 284 Traits::InstructionSet::Begin);
423 } 285 }
424 // TODO: Don't initialize IntegerRegisters and friends every time. 286 // TODO: Don't initialize IntegerRegisters and friends every time.
425 // Instead, initialize in some sort of static initializer for the 287 // Instead, initialize in some sort of static initializer for the
426 // class. 288 // class.
427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); 289 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); 290 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); 291 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); 292 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); 293 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
432 ScratchRegs.resize(RegX8632::Reg_NUM); 294 ScratchRegs.resize(RegX8632::Reg_NUM);
(...skipping 16 matching lines...) Expand all
449 TypeToRegisterSet[IceType_f64] = FloatRegisters; 311 TypeToRegisterSet[IceType_f64] = FloatRegisters;
450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 312 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 313 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; 314 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; 315 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; 316 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; 317 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 318 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
457 } 319 }
458 320
459 void TargetX8632::translateO2() { 321 template <class Machine> void TargetX86Base<Machine>::translateO2() {
460 TimerMarker T(TimerStack::TT_O2, Func); 322 TimerMarker T(TimerStack::TT_O2, Func);
461 323
462 if (!Ctx->getFlags().getPhiEdgeSplit()) { 324 if (!Ctx->getFlags().getPhiEdgeSplit()) {
463 // Lower Phi instructions. 325 // Lower Phi instructions.
464 Func->placePhiLoads(); 326 Func->placePhiLoads();
465 if (Func->hasError()) 327 if (Func->hasError())
466 return; 328 return;
467 Func->placePhiStores(); 329 Func->placePhiStores();
468 if (Func->hasError()) 330 if (Func->hasError())
469 return; 331 return;
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
561 // needed for searching for opportunities. 423 // needed for searching for opportunities.
562 Func->doBranchOpt(); 424 Func->doBranchOpt();
563 Func->dump("After branch optimization"); 425 Func->dump("After branch optimization");
564 426
565 // Nop insertion 427 // Nop insertion
566 if (Ctx->getFlags().shouldDoNopInsertion()) { 428 if (Ctx->getFlags().shouldDoNopInsertion()) {
567 Func->doNopInsertion(); 429 Func->doNopInsertion();
568 } 430 }
569 } 431 }
570 432
571 void TargetX8632::translateOm1() { 433 template <class Machine> void TargetX86Base<Machine>::translateOm1() {
572 TimerMarker T(TimerStack::TT_Om1, Func); 434 TimerMarker T(TimerStack::TT_Om1, Func);
573 435
574 Func->placePhiLoads(); 436 Func->placePhiLoads();
575 if (Func->hasError()) 437 if (Func->hasError())
576 return; 438 return;
577 Func->placePhiStores(); 439 Func->placePhiStores();
578 if (Func->hasError()) 440 if (Func->hasError())
579 return; 441 return;
580 Func->deletePhis(); 442 Func->deletePhis();
581 if (Func->hasError()) 443 if (Func->hasError())
(...skipping 16 matching lines...) Expand all
598 if (Func->hasError()) 460 if (Func->hasError())
599 return; 461 return;
600 Func->dump("After stack frame mapping"); 462 Func->dump("After stack frame mapping");
601 463
602 // Nop insertion 464 // Nop insertion
603 if (Ctx->getFlags().shouldDoNopInsertion()) { 465 if (Ctx->getFlags().shouldDoNopInsertion()) {
604 Func->doNopInsertion(); 466 Func->doNopInsertion();
605 } 467 }
606 } 468 }
607 469
608 namespace {
609
610 bool canRMW(const InstArithmetic *Arith) { 470 bool canRMW(const InstArithmetic *Arith) {
611 Type Ty = Arith->getDest()->getType(); 471 Type Ty = Arith->getDest()->getType();
612 // X86 vector instructions write to a register and have no RMW 472 // X86 vector instructions write to a register and have no RMW
613 // option. 473 // option.
614 if (isVectorType(Ty)) 474 if (isVectorType(Ty))
615 return false; 475 return false;
616 bool isI64 = Ty == IceType_i64; 476 bool isI64 = Ty == IceType_i64;
617 477
618 switch (Arith->getOp()) { 478 switch (Arith->getOp()) {
619 // Not handled for lack of simple lowering: 479 // Not handled for lack of simple lowering:
(...skipping 25 matching lines...) Expand all
645 return MemA->getBase() == MemB->getBase() && 505 return MemA->getBase() == MemB->getBase() &&
646 MemA->getOffset() == MemB->getOffset() && 506 MemA->getOffset() == MemB->getOffset() &&
647 MemA->getIndex() == MemB->getIndex() && 507 MemA->getIndex() == MemB->getIndex() &&
648 MemA->getShift() == MemB->getShift() && 508 MemA->getShift() == MemB->getShift() &&
649 MemA->getSegmentRegister() == MemB->getSegmentRegister(); 509 MemA->getSegmentRegister() == MemB->getSegmentRegister();
650 } 510 }
651 } 511 }
652 return false; 512 return false;
653 } 513 }
654 514
655 } // end of anonymous namespace 515 template <class Machine> void TargetX86Base<Machine>::findRMW() {
656
657 void TargetX8632::findRMW() {
658 Func->dump("Before RMW"); 516 Func->dump("Before RMW");
659 OstreamLocker L(Func->getContext()); 517 OstreamLocker L(Func->getContext());
660 Ostream &Str = Func->getContext()->getStrDump(); 518 Ostream &Str = Func->getContext()->getStrDump();
661 for (CfgNode *Node : Func->getNodes()) { 519 for (CfgNode *Node : Func->getNodes()) {
662 // Walk through the instructions, considering each sequence of 3 520 // Walk through the instructions, considering each sequence of 3
663 // instructions, and look for the particular RMW pattern. Note that this 521 // instructions, and look for the particular RMW pattern. Note that this
664 // search can be "broken" (false negatives) if there are intervening deleted 522 // search can be "broken" (false negatives) if there are intervening deleted
665 // instructions, or intervening instructions that could be safely moved out 523 // instructions, or intervening instructions that could be safely moved out
666 // of the way to reveal an RMW pattern. 524 // of the way to reveal an RMW pattern.
667 auto E = Node->getInsts().end(); 525 auto E = Node->getInsts().end();
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( 597 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); 598 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
741 Node->getInsts().insert(I3, RMW); 599 Node->getInsts().insert(I3, RMW);
742 } 600 }
743 } 601 }
744 } 602 }
745 } 603 }
746 } 604 }
747 } 605 }
748 606
749 namespace {
750
751 // Converts a ConstantInteger32 operand into its constant value, or 607 // Converts a ConstantInteger32 operand into its constant value, or
752 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 608 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
753 uint64_t getConstantMemoryOrder(Operand *Opnd) { 609 uint64_t getConstantMemoryOrder(Operand *Opnd) {
754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 610 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
755 return Integer->getValue(); 611 return Integer->getValue();
756 return Intrinsics::MemoryOrderInvalid; 612 return Intrinsics::MemoryOrderInvalid;
757 } 613 }
758 614
759 // Determines whether the dest of a Load instruction can be folded 615 // Determines whether the dest of a Load instruction can be folded
760 // into one of the src operands of a 2-operand instruction. This is 616 // into one of the src operands of a 2-operand instruction. This is
761 // true as long as the load dest matches exactly one of the binary 617 // true as long as the load dest matches exactly one of the binary
762 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if 618 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
763 // the answer is true. 619 // the answer is true.
764 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 620 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
765 Operand *&Src0, Operand *&Src1) { 621 Operand *&Src0, Operand *&Src1) {
766 if (Src0 == LoadDest && Src1 != LoadDest) { 622 if (Src0 == LoadDest && Src1 != LoadDest) {
767 Src0 = LoadSrc; 623 Src0 = LoadSrc;
768 return true; 624 return true;
769 } 625 }
770 if (Src0 != LoadDest && Src1 == LoadDest) { 626 if (Src0 != LoadDest && Src1 == LoadDest) {
771 Src1 = LoadSrc; 627 Src1 = LoadSrc;
772 return true; 628 return true;
773 } 629 }
774 return false; 630 return false;
775 } 631 }
776 632
777 } // end of anonymous namespace 633 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
778
779 void TargetX8632::doLoadOpt() {
780 for (CfgNode *Node : Func->getNodes()) { 634 for (CfgNode *Node : Func->getNodes()) {
781 Context.init(Node); 635 Context.init(Node);
782 while (!Context.atEnd()) { 636 while (!Context.atEnd()) {
783 Variable *LoadDest = nullptr; 637 Variable *LoadDest = nullptr;
784 Operand *LoadSrc = nullptr; 638 Operand *LoadSrc = nullptr;
785 Inst *CurInst = Context.getCur(); 639 Inst *CurInst = Context.getCur();
786 Inst *Next = Context.getNextInst(); 640 Inst *Next = Context.getNextInst();
787 // Determine whether the current instruction is a Load 641 // Determine whether the current instruction is a Load
788 // instruction or equivalent. 642 // instruction or equivalent.
789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 643 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
859 NewInst->spliceLivenessInfo(Next, CurInst); 713 NewInst->spliceLivenessInfo(Next, CurInst);
860 } 714 }
861 } 715 }
862 Context.advanceCur(); 716 Context.advanceCur();
863 Context.advanceNext(); 717 Context.advanceNext();
864 } 718 }
865 } 719 }
866 Func->dump("After load optimization"); 720 Func->dump("After load optimization");
867 } 721 }
868 722
869 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { 723 template <class Machine>
724 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { 725 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
871 return Br->optimizeBranch(NextNode); 726 return Br->optimizeBranch(NextNode);
872 } 727 }
873 return false; 728 return false;
874 } 729 }
875 730
876 IceString TargetX8632::RegNames[] = { 731 template <class Machine>
732 IceString TargetX86Base<Machine>::RegNames[] = {
877 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 733 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
878 frameptr, isI8, isInt, isFP) \ 734 frameptr, isI8, isInt, isFP) \
879 name, 735 name,
880 REGX8632_TABLE 736 REGX8632_TABLE
881 #undef X 737 #undef X
882 }; 738 };
883 739
884 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) { 740 template <class Machine>
741 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
885 if (Ty == IceType_void) 742 if (Ty == IceType_void)
886 Ty = IceType_i32; 743 Ty = IceType_i32;
887 if (PhysicalRegisters[Ty].empty()) 744 if (PhysicalRegisters[Ty].empty())
888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); 745 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
889 assert(RegNum < PhysicalRegisters[Ty].size()); 746 assert(RegNum < PhysicalRegisters[Ty].size());
890 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 747 Variable *Reg = PhysicalRegisters[Ty][RegNum];
891 if (Reg == nullptr) { 748 if (Reg == nullptr) {
892 Reg = Func->makeVariable(Ty); 749 Reg = Func->makeVariable(Ty);
893 Reg->setRegNum(RegNum); 750 Reg->setRegNum(RegNum);
894 PhysicalRegisters[Ty][RegNum] = Reg; 751 PhysicalRegisters[Ty][RegNum] = Reg;
895 // Specially mark esp as an "argument" so that it is considered 752 // Specially mark esp as an "argument" so that it is considered
896 // live upon function entry. 753 // live upon function entry.
897 if (RegNum == RegX8632::Reg_esp) { 754 if (RegNum == RegX8632::Reg_esp) {
898 Func->addImplicitArg(Reg); 755 Func->addImplicitArg(Reg);
899 Reg->setIgnoreLiveness(); 756 Reg->setIgnoreLiveness();
900 } 757 }
901 } 758 }
902 return Reg; 759 return Reg;
903 } 760 }
904 761
905 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { 762 template <class Machine>
763 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
906 assert(RegNum < RegX8632::Reg_NUM); 764 assert(RegNum < RegX8632::Reg_NUM);
907 static IceString RegNames8[] = { 765 static IceString RegNames8[] = {
908 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 766 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
909 frameptr, isI8, isInt, isFP) \ 767 frameptr, isI8, isInt, isFP) \
910 name8, 768 name8,
911 REGX8632_TABLE 769 REGX8632_TABLE
912 #undef X 770 #undef X
913 }; 771 };
914 static IceString RegNames16[] = { 772 static IceString RegNames16[] = {
915 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 773 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
916 frameptr, isI8, isInt, isFP) \ 774 frameptr, isI8, isInt, isFP) \
917 name16, 775 name16,
918 REGX8632_TABLE 776 REGX8632_TABLE
919 #undef X 777 #undef X
920 }; 778 };
921 switch (Ty) { 779 switch (Ty) {
922 case IceType_i1: 780 case IceType_i1:
923 case IceType_i8: 781 case IceType_i8:
924 return RegNames8[RegNum]; 782 return RegNames8[RegNum];
925 case IceType_i16: 783 case IceType_i16:
926 return RegNames16[RegNum]; 784 return RegNames16[RegNum];
927 default: 785 default:
928 return RegNames[RegNum]; 786 return RegNames[RegNum];
929 } 787 }
930 } 788 }
931 789
932 void TargetX8632::emitVariable(const Variable *Var) const { 790 template <class Machine>
791 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
933 Ostream &Str = Ctx->getStrEmit(); 792 Ostream &Str = Ctx->getStrEmit();
934 if (Var->hasReg()) { 793 if (Var->hasReg()) {
935 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 794 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
936 return; 795 return;
937 } 796 }
938 if (Var->getWeight().isInf()) { 797 if (Var->getWeight().isInf()) {
939 llvm_unreachable("Infinite-weight Variable has no register assigned"); 798 llvm_unreachable("Infinite-weight Variable has no register assigned");
940 } 799 }
941 int32_t Offset = Var->getStackOffset(); 800 int32_t Offset = Var->getStackOffset();
942 if (!hasFramePointer()) 801 if (!hasFramePointer())
943 Offset += getStackAdjustment(); 802 Offset += getStackAdjustment();
944 if (Offset) 803 if (Offset)
945 Str << Offset; 804 Str << Offset;
946 const Type FrameSPTy = IceType_i32; 805 const Type FrameSPTy = IceType_i32;
947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; 806 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
948 } 807 }
949 808
950 X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const { 809 template <class Machine>
810 X8632::Address
811 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
951 if (Var->hasReg()) 812 if (Var->hasReg())
952 llvm_unreachable("Stack Variable has a register assigned"); 813 llvm_unreachable("Stack Variable has a register assigned");
953 if (Var->getWeight().isInf()) { 814 if (Var->getWeight().isInf()) {
954 llvm_unreachable("Infinite-weight Variable has no register assigned"); 815 llvm_unreachable("Infinite-weight Variable has no register assigned");
955 } 816 }
956 int32_t Offset = Var->getStackOffset(); 817 int32_t Offset = Var->getStackOffset();
957 if (!hasFramePointer()) 818 if (!hasFramePointer())
958 Offset += getStackAdjustment(); 819 Offset += getStackAdjustment();
959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); 820 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
960 } 821 }
961 822
962 void TargetX8632::lowerArguments() { 823 template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
963 VarList &Args = Func->getArgs(); 824 VarList &Args = Func->getArgs();
964 // The first four arguments of vector type, regardless of their 825 // The first four arguments of vector type, regardless of their
965 // position relative to the other arguments in the argument list, are 826 // position relative to the other arguments in the argument list, are
966 // passed in registers xmm0 - xmm3. 827 // passed in registers xmm0 - xmm3.
967 unsigned NumXmmArgs = 0; 828 unsigned NumXmmArgs = 0;
968 829
969 Context.init(Func->getEntryNode()); 830 Context.init(Func->getEntryNode());
970 Context.setInsertPoint(Context.getCur()); 831 Context.setInsertPoint(Context.getCur());
971 832
972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; 833 for (SizeT I = 0, E = Args.size();
973 ++I) { 834 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
974 Variable *Arg = Args[I]; 835 Variable *Arg = Args[I];
975 Type Ty = Arg->getType(); 836 Type Ty = Arg->getType();
976 if (!isVectorType(Ty)) 837 if (!isVectorType(Ty))
977 continue; 838 continue;
978 // Replace Arg in the argument list with the home register. Then 839 // Replace Arg in the argument list with the home register. Then
979 // generate an instruction in the prolog to copy the home register 840 // generate an instruction in the prolog to copy the home register
980 // to the assigned location of Arg. 841 // to the assigned location of Arg.
981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; 842 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
982 ++NumXmmArgs; 843 ++NumXmmArgs;
983 Variable *RegisterArg = Func->makeVariable(Ty); 844 Variable *RegisterArg = Func->makeVariable(Ty);
(...skipping 10 matching lines...) Expand all
994 855
995 // Helper function for addProlog(). 856 // Helper function for addProlog().
996 // 857 //
997 // This assumes Arg is an argument passed on the stack. This sets the 858 // This assumes Arg is an argument passed on the stack. This sets the
998 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 859 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
999 // width. For an I64 arg that has been split into Lo and Hi components, 860 // width. For an I64 arg that has been split into Lo and Hi components,
1000 // it calls itself recursively on the components, taking care to handle 861 // it calls itself recursively on the components, taking care to handle
1001 // Lo first because of the little-endian architecture. Lastly, this 862 // Lo first because of the little-endian architecture. Lastly, this
1002 // function generates an instruction to copy Arg into its assigned 863 // function generates an instruction to copy Arg into its assigned
1003 // register if applicable. 864 // register if applicable.
1004 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 865 template <class Machine>
1005 size_t BasicFrameOffset, 866 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
1006 size_t &InArgsSizeBytes) { 867 Variable *FramePtr,
868 size_t BasicFrameOffset,
869 size_t &InArgsSizeBytes) {
1007 Variable *Lo = Arg->getLo(); 870 Variable *Lo = Arg->getLo();
1008 Variable *Hi = Arg->getHi(); 871 Variable *Hi = Arg->getHi();
1009 Type Ty = Arg->getType(); 872 Type Ty = Arg->getType();
1010 if (Lo && Hi && Ty == IceType_i64) { 873 if (Lo && Hi && Ty == IceType_i64) {
1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 874 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 875 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
1013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 876 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 877 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1015 return; 878 return;
1016 } 879 }
1017 if (isVectorType(Ty)) { 880 if (isVectorType(Ty)) {
1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); 881 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
1019 } 882 }
1020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 883 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
1021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 884 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1022 if (Arg->hasReg()) { 885 if (Arg->hasReg()) {
1023 assert(Ty != IceType_i64); 886 assert(Ty != IceType_i64);
1024 OperandX8632Mem *Mem = OperandX8632Mem::create( 887 OperandX8632Mem *Mem = OperandX8632Mem::create(
1025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 888 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
1026 if (isVectorType(Arg->getType())) { 889 if (isVectorType(Arg->getType())) {
1027 _movp(Arg, Mem); 890 _movp(Arg, Mem);
1028 } else { 891 } else {
1029 _mov(Arg, Mem); 892 _mov(Arg, Mem);
1030 } 893 }
1031 // This argument-copying instruction uses an explicit 894 // This argument-copying instruction uses an explicit
1032 // OperandX8632Mem operand instead of a Variable, so its 895 // OperandX8632Mem operand instead of a Variable, so its
1033 // fill-from-stack operation has to be tracked separately for 896 // fill-from-stack operation has to be tracked separately for
1034 // statistics. 897 // statistics.
1035 Ctx->statsUpdateFills(); 898 Ctx->statsUpdateFills();
1036 } 899 }
1037 } 900 }
1038 901
1039 Type TargetX8632::stackSlotType() { return IceType_i32; } 902 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
903 return IceType_i32;
904 }
1040 905
1041 void TargetX8632::addProlog(CfgNode *Node) { 906 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
1042 // Stack frame layout: 907 // Stack frame layout:
1043 // 908 //
1044 // +------------------------+ 909 // +------------------------+
1045 // | 1. return address | 910 // | 1. return address |
1046 // +------------------------+ 911 // +------------------------+
1047 // | 2. preserved registers | 912 // | 2. preserved registers |
1048 // +------------------------+ 913 // +------------------------+
1049 // | 3. padding | 914 // | 3. padding |
1050 // +------------------------+ 915 // +------------------------+
1051 // | 4. global spill area | 916 // | 4. global spill area |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
1140 _mov(ebp, esp); 1005 _mov(ebp, esp);
1141 // Keep ebp live for late-stage liveness analysis 1006 // Keep ebp live for late-stage liveness analysis
1142 // (e.g. asm-verbose mode). 1007 // (e.g. asm-verbose mode).
1143 Context.insert(InstFakeUse::create(Func, ebp)); 1008 Context.insert(InstFakeUse::create(Func, ebp));
1144 } 1009 }
1145 1010
1146 // Align the variables area. SpillAreaPaddingBytes is the size of 1011 // Align the variables area. SpillAreaPaddingBytes is the size of
1147 // the region after the preserved registers and before the spill areas. 1012 // the region after the preserved registers and before the spill areas.
1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals 1013 // LocalsSlotsPaddingBytes is the amount of padding between the globals
1149 // and locals area if they are separate. 1014 // and locals area if they are separate.
1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); 1015 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 1016 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1152 uint32_t SpillAreaPaddingBytes = 0; 1017 uint32_t SpillAreaPaddingBytes = 0;
1153 uint32_t LocalsSlotsPaddingBytes = 0; 1018 uint32_t LocalsSlotsPaddingBytes = 0;
1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, 1019 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
1155 SpillAreaAlignmentBytes, GlobalsSize, 1020 SpillAreaAlignmentBytes, GlobalsSize,
1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, 1021 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1157 &LocalsSlotsPaddingBytes); 1022 &LocalsSlotsPaddingBytes);
1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 1023 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1159 uint32_t GlobalsAndSubsequentPaddingSize = 1024 uint32_t GlobalsAndSubsequentPaddingSize =
1160 GlobalsSize + LocalsSlotsPaddingBytes; 1025 GlobalsSize + LocalsSlotsPaddingBytes;
1161 1026
1162 // Align esp if necessary. 1027 // Align esp if necessary.
1163 if (NeedsStackAlignment) { 1028 if (NeedsStackAlignment) {
1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 1029 uint32_t StackOffset =
1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 1030 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1031 uint32_t StackSize =
1032 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1166 SpillAreaSizeBytes = StackSize - StackOffset; 1033 SpillAreaSizeBytes = StackSize - StackOffset;
1167 } 1034 }
1168 1035
1169 // Generate "sub esp, SpillAreaSizeBytes" 1036 // Generate "sub esp, SpillAreaSizeBytes"
1170 if (SpillAreaSizeBytes) 1037 if (SpillAreaSizeBytes)
1171 _sub(getPhysicalRegister(RegX8632::Reg_esp), 1038 _sub(getPhysicalRegister(RegX8632::Reg_esp),
1172 Ctx->getConstantInt32(SpillAreaSizeBytes)); 1039 Ctx->getConstantInt32(SpillAreaSizeBytes));
1173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 1040 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1174 1041
1175 resetStackAdjustment(); 1042 resetStackAdjustment();
1176 1043
1177 // Fill in stack offsets for stack args, and copy args into registers 1044 // Fill in stack offsets for stack args, and copy args into registers
1178 // for those that were register-allocated. Args are pushed right to 1045 // for those that were register-allocated. Args are pushed right to
1179 // left, so Arg[0] is closest to the stack/frame pointer. 1046 // left, so Arg[0] is closest to the stack/frame pointer.
1180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 1047 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 1048 size_t BasicFrameOffset =
1049 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1182 if (!IsEbpBasedFrame) 1050 if (!IsEbpBasedFrame)
1183 BasicFrameOffset += SpillAreaSizeBytes; 1051 BasicFrameOffset += SpillAreaSizeBytes;
1184 1052
1185 const VarList &Args = Func->getArgs(); 1053 const VarList &Args = Func->getArgs();
1186 size_t InArgsSizeBytes = 0; 1054 size_t InArgsSizeBytes = 0;
1187 unsigned NumXmmArgs = 0; 1055 unsigned NumXmmArgs = 0;
1188 for (Variable *Arg : Args) { 1056 for (Variable *Arg : Args) {
1189 // Skip arguments passed in registers. 1057 // Skip arguments passed in registers.
1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 1058 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
1191 ++NumXmmArgs; 1059 ++NumXmmArgs;
1192 continue; 1060 continue;
1193 } 1061 }
1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 1062 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1195 } 1063 }
1196 1064
1197 // Fill in stack offsets for locals. 1065 // Fill in stack offsets for locals.
1198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 1066 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, 1067 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1200 IsEbpBasedFrame); 1068 IsEbpBasedFrame);
1201 // Assign stack offsets to variables that have been linked to spilled 1069 // Assign stack offsets to variables that have been linked to spilled
1202 // variables. 1070 // variables.
1203 for (Variable *Var : VariablesLinkedToSpillSlots) { 1071 for (Variable *Var : VariablesLinkedToSpillSlots) {
1204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); 1072 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
1205 Var->setStackOffset(Linked->getStackOffset()); 1073 Var->setStackOffset(Linked->getStackOffset());
1206 } 1074 }
1207 this->HasComputedFrame = true; 1075 this->HasComputedFrame = true;
1208 1076
1209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { 1077 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
1210 OstreamLocker L(Func->getContext()); 1078 OstreamLocker L(Func->getContext());
1211 Ostream &Str = Func->getContext()->getStrDump(); 1079 Ostream &Str = Func->getContext()->getStrDump();
1212 1080
1213 Str << "Stack layout:\n"; 1081 Str << "Stack layout:\n";
1214 uint32_t EspAdjustmentPaddingSize = 1082 uint32_t EspAdjustmentPaddingSize =
1215 SpillAreaSizeBytes - LocalsSpillAreaSize - 1083 SpillAreaSizeBytes - LocalsSpillAreaSize -
1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 1084 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 1085 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n" 1086 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 1087 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 1088 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1221 << " globals spill area = " << GlobalsSize << " bytes\n" 1089 << " globals spill area = " << GlobalsSize << " bytes\n"
1222 << " globals-locals spill areas intermediate padding = " 1090 << " globals-locals spill areas intermediate padding = "
1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 1091 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 1092 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1225 << " esp alignment padding = " << EspAdjustmentPaddingSize 1093 << " esp alignment padding = " << EspAdjustmentPaddingSize
1226 << " bytes\n"; 1094 << " bytes\n";
1227 1095
1228 Str << "Stack details:\n" 1096 Str << "Stack details:\n"
1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" 1097 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 1098 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 1099 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1232 << " bytes\n" 1100 << " bytes\n"
1233 << " is ebp based = " << IsEbpBasedFrame << "\n"; 1101 << " is ebp based = " << IsEbpBasedFrame << "\n";
1234 } 1102 }
1235 } 1103 }
1236 1104
1237 void TargetX8632::addEpilog(CfgNode *Node) { 1105 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
1238 InstList &Insts = Node->getInsts(); 1106 InstList &Insts = Node->getInsts();
1239 InstList::reverse_iterator RI, E; 1107 InstList::reverse_iterator RI, E;
1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 1108 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1241 if (llvm::isa<InstX8632Ret>(*RI)) 1109 if (llvm::isa<InstX8632Ret>(*RI))
1242 break; 1110 break;
1243 } 1111 }
1244 if (RI == E) 1112 if (RI == E)
1245 return; 1113 return;
1246 1114
1247 // Convert the reverse_iterator position into its corresponding 1115 // Convert the reverse_iterator position into its corresponding
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1280 1148
1281 if (!Ctx->getFlags().getUseSandboxing()) 1149 if (!Ctx->getFlags().getUseSandboxing())
1282 return; 1150 return;
1283 // Change the original ret instruction into a sandboxed return sequence. 1151 // Change the original ret instruction into a sandboxed return sequence.
1284 // t:ecx = pop 1152 // t:ecx = pop
1285 // bundle_lock 1153 // bundle_lock
1286 // and t, ~31 1154 // and t, ~31
1287 // jmp *t 1155 // jmp *t
1288 // bundle_unlock 1156 // bundle_unlock
1289 // FakeUse <original_ret_operand> 1157 // FakeUse <original_ret_operand>
1290 const SizeT BundleSize = 1 1158 const SizeT BundleSize =
1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 1159 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 1160 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
1293 _pop(T_ecx); 1161 _pop(T_ecx);
1294 _bundle_lock(); 1162 _bundle_lock();
1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); 1163 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
1296 _jmp(T_ecx); 1164 _jmp(T_ecx);
1297 _bundle_unlock(); 1165 _bundle_unlock();
1298 if (RI->getSrcSize()) { 1166 if (RI->getSrcSize()) {
1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1167 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1300 Context.insert(InstFakeUse::create(Func, RetValue)); 1168 Context.insert(InstFakeUse::create(Func, RetValue));
1301 } 1169 }
1302 RI->setDeleted(); 1170 RI->setDeleted();
1303 } 1171 }
1304 1172
1305 void TargetX8632::split64(Variable *Var) { 1173 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
1306 switch (Var->getType()) { 1174 switch (Var->getType()) {
1307 default: 1175 default:
1308 return; 1176 return;
1309 case IceType_i64: 1177 case IceType_i64:
1310 // TODO: Only consider F64 if we need to push each half when 1178 // TODO: Only consider F64 if we need to push each half when
1311 // passing as an argument to a function call. Note that each half 1179 // passing as an argument to a function call. Note that each half
1312 // is still typed as I32. 1180 // is still typed as I32.
1313 case IceType_f64: 1181 case IceType_f64:
1314 break; 1182 break;
1315 } 1183 }
(...skipping 10 matching lines...) Expand all
1326 Lo->setName(Func, Var->getName(Func) + "__lo"); 1194 Lo->setName(Func, Var->getName(Func) + "__lo");
1327 Hi->setName(Func, Var->getName(Func) + "__hi"); 1195 Hi->setName(Func, Var->getName(Func) + "__hi");
1328 } 1196 }
1329 Var->setLoHi(Lo, Hi); 1197 Var->setLoHi(Lo, Hi);
1330 if (Var->getIsArg()) { 1198 if (Var->getIsArg()) {
1331 Lo->setIsArg(); 1199 Lo->setIsArg();
1332 Hi->setIsArg(); 1200 Hi->setIsArg();
1333 } 1201 }
1334 } 1202 }
1335 1203
1336 Operand *TargetX8632::loOperand(Operand *Operand) { 1204 template <class Machine>
1205 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
1337 assert(Operand->getType() == IceType_i64 || 1206 assert(Operand->getType() == IceType_i64 ||
1338 Operand->getType() == IceType_f64); 1207 Operand->getType() == IceType_f64);
1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 1208 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1340 return Operand; 1209 return Operand;
1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1210 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1342 split64(Var); 1211 split64(Var);
1343 return Var->getLo(); 1212 return Var->getLo();
1344 } 1213 }
1345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1214 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1215 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 1216 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
1348 return legalize(ConstInt); 1217 return legalize(ConstInt);
1349 } 1218 }
1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1219 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1351 OperandX8632Mem *MemOperand = OperandX8632Mem::create( 1220 OperandX8632Mem *MemOperand = OperandX8632Mem::create(
1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 1221 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
1353 Mem->getShift(), Mem->getSegmentRegister()); 1222 Mem->getShift(), Mem->getSegmentRegister());
1354 // Test if we should randomize or pool the offset, if so randomize it or 1223 // Test if we should randomize or pool the offset, if so randomize it or
1355 // pool it then create mem operand with the blinded/pooled constant. 1224 // pool it then create mem operand with the blinded/pooled constant.
1356 // Otherwise, return the mem operand as ordinary mem operand. 1225 // Otherwise, return the mem operand as ordinary mem operand.
1357 return legalize(MemOperand); 1226 return legalize(MemOperand);
1358 } 1227 }
1359 llvm_unreachable("Unsupported operand type"); 1228 llvm_unreachable("Unsupported operand type");
1360 return nullptr; 1229 return nullptr;
1361 } 1230 }
1362 1231
1363 Operand *TargetX8632::hiOperand(Operand *Operand) { 1232 template <class Machine>
1233 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
1364 assert(Operand->getType() == IceType_i64 || 1234 assert(Operand->getType() == IceType_i64 ||
1365 Operand->getType() == IceType_f64); 1235 Operand->getType() == IceType_f64);
1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 1236 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1367 return Operand; 1237 return Operand;
1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1238 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1369 split64(Var); 1239 split64(Var);
1370 return Var->getHi(); 1240 return Var->getHi();
1371 } 1241 }
1372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1242 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1243 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
(...skipping 20 matching lines...) Expand all
1394 Mem->getShift(), Mem->getSegmentRegister()); 1264 Mem->getShift(), Mem->getSegmentRegister());
1395 // Test if the Offset is an eligible i32 constants for randomization and 1265 // Test if the Offset is an eligible i32 constants for randomization and
1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem 1266 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1397 // operand. 1267 // operand.
1398 return legalize(MemOperand); 1268 return legalize(MemOperand);
1399 } 1269 }
1400 llvm_unreachable("Unsupported operand type"); 1270 llvm_unreachable("Unsupported operand type");
1401 return nullptr; 1271 return nullptr;
1402 } 1272 }
1403 1273
1404 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, 1274 template <class Machine>
1405 RegSetMask Exclude) const { 1275 llvm::SmallBitVector
1276 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
1277 RegSetMask Exclude) const {
1406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); 1278 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
1407 1279
1408 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 1280 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
1409 frameptr, isI8, isInt, isFP) \ 1281 frameptr, isI8, isInt, isFP) \
1410 if (scratch && (Include & RegSet_CallerSave)) \ 1282 if (scratch && (Include & RegSet_CallerSave)) \
1411 Registers[RegX8632::val] = true; \ 1283 Registers[RegX8632::val] = true; \
1412 if (preserved && (Include & RegSet_CalleeSave)) \ 1284 if (preserved && (Include & RegSet_CalleeSave)) \
1413 Registers[RegX8632::val] = true; \ 1285 Registers[RegX8632::val] = true; \
1414 if (stackptr && (Include & RegSet_StackPointer)) \ 1286 if (stackptr && (Include & RegSet_StackPointer)) \
1415 Registers[RegX8632::val] = true; \ 1287 Registers[RegX8632::val] = true; \
1416 if (frameptr && (Include & RegSet_FramePointer)) \ 1288 if (frameptr && (Include & RegSet_FramePointer)) \
1417 Registers[RegX8632::val] = true; \ 1289 Registers[RegX8632::val] = true; \
1418 if (scratch && (Exclude & RegSet_CallerSave)) \ 1290 if (scratch && (Exclude & RegSet_CallerSave)) \
1419 Registers[RegX8632::val] = false; \ 1291 Registers[RegX8632::val] = false; \
1420 if (preserved && (Exclude & RegSet_CalleeSave)) \ 1292 if (preserved && (Exclude & RegSet_CalleeSave)) \
1421 Registers[RegX8632::val] = false; \ 1293 Registers[RegX8632::val] = false; \
1422 if (stackptr && (Exclude & RegSet_StackPointer)) \ 1294 if (stackptr && (Exclude & RegSet_StackPointer)) \
1423 Registers[RegX8632::val] = false; \ 1295 Registers[RegX8632::val] = false; \
1424 if (frameptr && (Exclude & RegSet_FramePointer)) \ 1296 if (frameptr && (Exclude & RegSet_FramePointer)) \
1425 Registers[RegX8632::val] = false; 1297 Registers[RegX8632::val] = false;
1426 1298
1427 REGX8632_TABLE 1299 REGX8632_TABLE
1428 1300
1429 #undef X 1301 #undef X
1430 1302
1431 return Registers; 1303 return Registers;
1432 } 1304 }
1433 1305
1434 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { 1306 template <class Machine>
1307 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
1435 IsEbpBasedFrame = true; 1308 IsEbpBasedFrame = true;
1436 // Conservatively require the stack to be aligned. Some stack 1309 // Conservatively require the stack to be aligned. Some stack
1437 // adjustment operations implemented below assume that the stack is 1310 // adjustment operations implemented below assume that the stack is
1438 // aligned before the alloca. All the alloca code ensures that the 1311 // aligned before the alloca. All the alloca code ensures that the
1439 // stack alignment is preserved after the alloca. The stack alignment 1312 // stack alignment is preserved after the alloca. The stack alignment
1440 // restriction can be relaxed in some cases. 1313 // restriction can be relaxed in some cases.
1441 NeedsStackAlignment = true; 1314 NeedsStackAlignment = true;
1442 1315
1443 // TODO(stichnot): minimize the number of adjustments of esp, etc. 1316 // TODO(stichnot): minimize the number of adjustments of esp, etc.
1444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 1317 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
1445 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 1318 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1446 Variable *Dest = Inst->getDest(); 1319 Variable *Dest = Inst->getDest();
1447 uint32_t AlignmentParam = Inst->getAlignInBytes(); 1320 uint32_t AlignmentParam = Inst->getAlignInBytes();
1448 // For default align=0, set it to the real value 1, to avoid any 1321 // For default align=0, set it to the real value 1, to avoid any
1449 // bit-manipulation problems below. 1322 // bit-manipulation problems below.
1450 AlignmentParam = std::max(AlignmentParam, 1u); 1323 AlignmentParam = std::max(AlignmentParam, 1u);
1451 1324
1452 // LLVM enforces power of 2 alignment. 1325 // LLVM enforces power of 2 alignment.
1453 assert(llvm::isPowerOf2_32(AlignmentParam)); 1326 assert(llvm::isPowerOf2_32(AlignmentParam));
1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES)); 1327 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1455 1328
1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1329 uint32_t Alignment =
1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1330 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
1331 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
1458 _and(esp, Ctx->getConstantInt32(-Alignment)); 1332 _and(esp, Ctx->getConstantInt32(-Alignment));
1459 } 1333 }
1460 if (const auto *ConstantTotalSize = 1334 if (const auto *ConstantTotalSize =
1461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1335 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1462 uint32_t Value = ConstantTotalSize->getValue(); 1336 uint32_t Value = ConstantTotalSize->getValue();
1463 Value = Utils::applyAlignment(Value, Alignment); 1337 Value = Utils::applyAlignment(Value, Alignment);
1464 _sub(esp, Ctx->getConstantInt32(Value)); 1338 _sub(esp, Ctx->getConstantInt32(Value));
1465 } else { 1339 } else {
1466 // Non-constant sizes need to be adjusted to the next highest 1340 // Non-constant sizes need to be adjusted to the next highest
1467 // multiple of the required alignment at runtime. 1341 // multiple of the required alignment at runtime.
1468 Variable *T = makeReg(IceType_i32); 1342 Variable *T = makeReg(IceType_i32);
1469 _mov(T, TotalSize); 1343 _mov(T, TotalSize);
1470 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1344 _add(T, Ctx->getConstantInt32(Alignment - 1));
1471 _and(T, Ctx->getConstantInt32(-Alignment)); 1345 _and(T, Ctx->getConstantInt32(-Alignment));
1472 _sub(esp, T); 1346 _sub(esp, T);
1473 } 1347 }
1474 _mov(Dest, esp); 1348 _mov(Dest, esp);
1475 } 1349 }
1476 1350
1477 // Strength-reduce scalar integer multiplication by a constant (for 1351 // Strength-reduce scalar integer multiplication by a constant (for
1478 // i32 or narrower) for certain constants. The lea instruction can be 1352 // i32 or narrower) for certain constants. The lea instruction can be
1479 // used to multiply by 3, 5, or 9, and the lsh instruction can be used 1353 // used to multiply by 3, 5, or 9, and the lsh instruction can be used
1480 // to multiply by powers of 2. These can be combined such that 1354 // to multiply by powers of 2. These can be combined such that
1481 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, 1355 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1482 // combined with left-shifting by 2. 1356 // combined with left-shifting by 2.
1483 bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0, 1357 template <class Machine>
1484 int32_t Src1) { 1358 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1359 int32_t Src1) {
1485 // Disable this optimization for Om1 and O0, just to keep things 1360 // Disable this optimization for Om1 and O0, just to keep things
1486 // simple there. 1361 // simple there.
1487 if (Ctx->getFlags().getOptLevel() < Opt_1) 1362 if (Ctx->getFlags().getOptLevel() < Opt_1)
1488 return false; 1363 return false;
1489 Type Ty = Dest->getType(); 1364 Type Ty = Dest->getType();
1490 Variable *T = nullptr; 1365 Variable *T = nullptr;
1491 if (Src1 == -1) { 1366 if (Src1 == -1) {
1492 _mov(T, Src0); 1367 _mov(T, Src0);
1493 _neg(T); 1368 _neg(T);
1494 _mov(Dest, T); 1369 _mov(Dest, T);
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
1563 } 1438 }
1564 if (Count2) { 1439 if (Count2) {
1565 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1440 _shl(T, Ctx->getConstantInt(Ty, Count2));
1566 } 1441 }
1567 if (Src1IsNegative) 1442 if (Src1IsNegative)
1568 _neg(T); 1443 _neg(T);
1569 _mov(Dest, T); 1444 _mov(Dest, T);
1570 return true; 1445 return true;
1571 } 1446 }
1572 1447
1573 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1448 template <class Machine>
1449 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1574 Variable *Dest = Inst->getDest(); 1450 Variable *Dest = Inst->getDest();
1575 Operand *Src0 = legalize(Inst->getSrc(0)); 1451 Operand *Src0 = legalize(Inst->getSrc(0));
1576 Operand *Src1 = legalize(Inst->getSrc(1)); 1452 Operand *Src1 = legalize(Inst->getSrc(1));
1577 if (Inst->isCommutative()) { 1453 if (Inst->isCommutative()) {
1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1454 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1579 std::swap(Src0, Src1); 1455 std::swap(Src0, Src1);
1580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) 1456 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1581 std::swap(Src0, Src1); 1457 std::swap(Src0, Src1);
1582 } 1458 }
1583 if (Dest->getType() == IceType_i64) { 1459 if (Dest->getType() == IceType_i64) {
(...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after
1866 case InstArithmetic::Sub: { 1742 case InstArithmetic::Sub: {
1867 Variable *T = makeReg(Dest->getType()); 1743 Variable *T = makeReg(Dest->getType());
1868 _movp(T, Src0); 1744 _movp(T, Src0);
1869 _psub(T, Src1); 1745 _psub(T, Src1);
1870 _movp(Dest, T); 1746 _movp(Dest, T);
1871 } break; 1747 } break;
1872 case InstArithmetic::Mul: { 1748 case InstArithmetic::Mul: {
1873 bool TypesAreValidForPmull = 1749 bool TypesAreValidForPmull =
1874 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1750 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1875 bool InstructionSetIsValidForPmull = 1751 bool InstructionSetIsValidForPmull =
1876 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; 1752 Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1;
1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1753 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1878 Variable *T = makeReg(Dest->getType()); 1754 Variable *T = makeReg(Dest->getType());
1879 _movp(T, Src0); 1755 _movp(T, Src0);
1880 _pmull(T, Src1); 1756 _pmull(T, Src1);
1881 _movp(Dest, T); 1757 _movp(Dest, T);
1882 } else if (Dest->getType() == IceType_v4i32) { 1758 } else if (Dest->getType() == IceType_v4i32) {
1883 // Lowering sequence: 1759 // Lowering sequence:
1884 // Note: The mask arguments have index 0 on the left. 1760 // Note: The mask arguments have index 0 on the left.
1885 // 1761 //
1886 // movups T1, Src0 1762 // movups T1, Src0
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 1936 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2061 uint32_t LogDiv = llvm::Log2_32(UDivisor); 1937 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2062 Type Ty = Dest->getType(); 1938 Type Ty = Dest->getType();
2063 // LLVM does the following for dest=src/(1<<log): 1939 // LLVM does the following for dest=src/(1<<log):
2064 // t=src 1940 // t=src
2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not 1941 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2066 // shr t,typewidth-log 1942 // shr t,typewidth-log
2067 // add t,src 1943 // add t,src
2068 // sar t,log 1944 // sar t,log
2069 // dest=t 1945 // dest=t
2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); 1946 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2071 _mov(T, Src0); 1947 _mov(T, Src0);
2072 // If for some reason we are dividing by 1, just treat it 1948 // If for some reason we are dividing by 1, just treat it
2073 // like an assignment. 1949 // like an assignment.
2074 if (LogDiv > 0) { 1950 if (LogDiv > 0) {
2075 // The initial sar is unnecessary when dividing by 2. 1951 // The initial sar is unnecessary when dividing by 2.
2076 if (LogDiv > 1) 1952 if (LogDiv > 1)
2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 1953 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 1954 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2079 _add(T, Src0); 1955 _add(T, Src0);
2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1956 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
2129 Type Ty = Dest->getType(); 2005 Type Ty = Dest->getType();
2130 // LLVM does the following for dest=src%(1<<log): 2006 // LLVM does the following for dest=src%(1<<log):
2131 // t=src 2007 // t=src
2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not 2008 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2133 // shr t,typewidth-log 2009 // shr t,typewidth-log
2134 // add t,src 2010 // add t,src
2135 // and t, -(1<<log) 2011 // and t, -(1<<log)
2136 // sub t,src 2012 // sub t,src
2137 // neg t 2013 // neg t
2138 // dest=t 2014 // dest=t
2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); 2015 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2140 // If for some reason we are dividing by 1, just assign 0. 2016 // If for some reason we are dividing by 1, just assign 0.
2141 if (LogDiv == 0) { 2017 if (LogDiv == 0) {
2142 _mov(Dest, Ctx->getConstantZero(Ty)); 2018 _mov(Dest, Ctx->getConstantZero(Ty));
2143 return; 2019 return;
2144 } 2020 }
2145 _mov(T, Src0); 2021 _mov(T, Src0);
2146 // The initial sar is unnecessary when dividing by 2. 2022 // The initial sar is unnecessary when dividing by 2.
2147 if (LogDiv > 1) 2023 if (LogDiv > 1)
2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 2024 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 2025 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
2197 Type Ty = Dest->getType(); 2073 Type Ty = Dest->getType();
2198 InstCall *Call = makeHelperCall( 2074 InstCall *Call = makeHelperCall(
2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 2075 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
2200 Call->addArg(Src0); 2076 Call->addArg(Src0);
2201 Call->addArg(Src1); 2077 Call->addArg(Src1);
2202 return lowerCall(Call); 2078 return lowerCall(Call);
2203 } 2079 }
2204 } 2080 }
2205 } 2081 }
2206 2082
2207 void TargetX8632::lowerAssign(const InstAssign *Inst) { 2083 template <class Machine>
2084 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
2208 Variable *Dest = Inst->getDest(); 2085 Variable *Dest = Inst->getDest();
2209 Operand *Src0 = Inst->getSrc(0); 2086 Operand *Src0 = Inst->getSrc(0);
2210 assert(Dest->getType() == Src0->getType()); 2087 assert(Dest->getType() == Src0->getType());
2211 if (Dest->getType() == IceType_i64) { 2088 if (Dest->getType() == IceType_i64) {
2212 Src0 = legalize(Src0); 2089 Src0 = legalize(Src0);
2213 Operand *Src0Lo = loOperand(Src0); 2090 Operand *Src0Lo = loOperand(Src0);
2214 Operand *Src0Hi = hiOperand(Src0); 2091 Operand *Src0Hi = hiOperand(Src0);
2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2092 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2093 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2217 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2094 Variable *T_Lo = nullptr, *T_Hi = nullptr;
(...skipping 24 matching lines...) Expand all
2242 // register or a scalar integer immediate. 2119 // register or a scalar integer immediate.
2243 RI = legalize(Src0, Legal_Reg | Legal_Imm); 2120 RI = legalize(Src0, Legal_Reg | Legal_Imm);
2244 } 2121 }
2245 if (isVectorType(Dest->getType())) 2122 if (isVectorType(Dest->getType()))
2246 _movp(Dest, RI); 2123 _movp(Dest, RI);
2247 else 2124 else
2248 _mov(Dest, RI); 2125 _mov(Dest, RI);
2249 } 2126 }
2250 } 2127 }
2251 2128
2252 void TargetX8632::lowerBr(const InstBr *Inst) { 2129 template <class Machine>
2130 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
2253 if (Inst->isUnconditional()) { 2131 if (Inst->isUnconditional()) {
2254 _br(Inst->getTargetUnconditional()); 2132 _br(Inst->getTargetUnconditional());
2255 return; 2133 return;
2256 } 2134 }
2257 Operand *Cond = Inst->getCondition(); 2135 Operand *Cond = Inst->getCondition();
2258 2136
2259 // Handle folding opportunities. 2137 // Handle folding opportunities.
2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 2138 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
2261 assert(Producer->isDeleted()); 2139 assert(Producer->isDeleted());
2262 switch (BoolFolding::getProducerKind(Producer)) { 2140 switch (BoolFolding::getProducerKind(Producer)) {
2263 default: 2141 default:
2264 break; 2142 break;
2265 case BoolFolding::PK_Icmp32: { 2143 case BoolFolding::PK_Icmp32: {
2266 // TODO(stichnot): Refactor similarities between this block and 2144 // TODO(stichnot): Refactor similarities between this block and
2267 // the corresponding code in lowerIcmp(). 2145 // the corresponding code in lowerIcmp().
2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 2146 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
2269 Operand *Src0 = Producer->getSrc(0); 2147 Operand *Src0 = Producer->getSrc(0);
2270 Operand *Src1 = legalize(Producer->getSrc(1)); 2148 Operand *Src1 = legalize(Producer->getSrc(1));
2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2149 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2272 _cmp(Src0RM, Src1); 2150 _cmp(Src0RM, Src1);
2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), 2151 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
2274 Inst->getTargetFalse()); 2152 Inst->getTargetFalse());
2275 return; 2153 return;
2276 } 2154 }
2277 } 2155 }
2278 } 2156 }
2279 2157
2280 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 2158 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2281 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2159 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2282 _cmp(Src0, Zero); 2160 _cmp(Src0, Zero);
2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 2161 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
2284 } 2162 }
2285 2163
2286 void TargetX8632::lowerCall(const InstCall *Instr) { 2164 template <class Machine>
2165 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
2287 // x86-32 calling convention: 2166 // x86-32 calling convention:
2288 // 2167 //
2289 // * At the point before the call, the stack must be aligned to 16 2168 // * At the point before the call, the stack must be aligned to 16
2290 // bytes. 2169 // bytes.
2291 // 2170 //
2292 // * The first four arguments of vector type, regardless of their 2171 // * The first four arguments of vector type, regardless of their
2293 // position relative to the other arguments in the argument list, are 2172 // position relative to the other arguments in the argument list, are
2294 // placed in registers xmm0 - xmm3. 2173 // placed in registers xmm0 - xmm3.
2295 // 2174 //
2296 // * Other arguments are pushed onto the stack in right-to-left order, 2175 // * Other arguments are pushed onto the stack in right-to-left order,
(...skipping 14 matching lines...) Expand all
2311 OperandList StackArgs, StackArgLocations; 2190 OperandList StackArgs, StackArgLocations;
2312 uint32_t ParameterAreaSizeBytes = 0; 2191 uint32_t ParameterAreaSizeBytes = 0;
2313 2192
2314 // Classify each argument operand according to the location where the 2193 // Classify each argument operand according to the location where the
2315 // argument is passed. 2194 // argument is passed.
2316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 2195 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2317 Operand *Arg = Instr->getArg(i); 2196 Operand *Arg = Instr->getArg(i);
2318 Type Ty = Arg->getType(); 2197 Type Ty = Arg->getType();
2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 2198 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2320 assert(typeWidthInBytes(Ty) >= 4); 2199 assert(typeWidthInBytes(Ty) >= 4);
2321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 2200 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
2322 XmmArgs.push_back(Arg); 2201 XmmArgs.push_back(Arg);
2323 } else { 2202 } else {
2324 StackArgs.push_back(Arg); 2203 StackArgs.push_back(Arg);
2325 if (isVectorType(Arg->getType())) { 2204 if (isVectorType(Arg->getType())) {
2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 2205 ParameterAreaSizeBytes =
2206 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2327 } 2207 }
2328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 2208 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
2329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 2209 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 2210 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 2211 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2332 } 2212 }
2333 } 2213 }
2334 2214
2335 // Adjust the parameter area so that the stack is aligned. It is 2215 // Adjust the parameter area so that the stack is aligned. It is
2336 // assumed that the stack is already aligned at the start of the 2216 // assumed that the stack is already aligned at the start of the
2337 // calling sequence. 2217 // calling sequence.
2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 2218 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2339 2219
2340 // Subtract the appropriate amount for the argument area. This also 2220 // Subtract the appropriate amount for the argument area. This also
2341 // takes care of setting the stack adjustment during emission. 2221 // takes care of setting the stack adjustment during emission.
2342 // 2222 //
2343 // TODO: If for some reason the call instruction gets dead-code 2223 // TODO: If for some reason the call instruction gets dead-code
2344 // eliminated after lowering, we would need to ensure that the 2224 // eliminated after lowering, we would need to ensure that the
2345 // pre-call and the post-call esp adjustment get eliminated as well. 2225 // pre-call and the post-call esp adjustment get eliminated as well.
2346 if (ParameterAreaSizeBytes) { 2226 if (ParameterAreaSizeBytes) {
2347 _adjust_stack(ParameterAreaSizeBytes); 2227 _adjust_stack(ParameterAreaSizeBytes);
2348 } 2228 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
2411 Operand *CallTarget = legalize(Instr->getCallTarget()); 2291 Operand *CallTarget = legalize(Instr->getCallTarget());
2412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); 2292 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2413 if (NeedSandboxing) { 2293 if (NeedSandboxing) {
2414 if (llvm::isa<Constant>(CallTarget)) { 2294 if (llvm::isa<Constant>(CallTarget)) {
2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2295 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2416 } else { 2296 } else {
2417 Variable *CallTargetVar = nullptr; 2297 Variable *CallTargetVar = nullptr;
2418 _mov(CallTargetVar, CallTarget); 2298 _mov(CallTargetVar, CallTarget);
2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2299 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2420 const SizeT BundleSize = 2300 const SizeT BundleSize =
2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 2301 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); 2302 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2423 CallTarget = CallTargetVar; 2303 CallTarget = CallTargetVar;
2424 } 2304 }
2425 } 2305 }
2426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 2306 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
2427 Context.insert(NewCall); 2307 Context.insert(NewCall);
2428 if (NeedSandboxing) 2308 if (NeedSandboxing)
2429 _bundle_unlock(); 2309 _bundle_unlock();
2430 if (ReturnRegHi) 2310 if (ReturnRegHi)
2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2311 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2473 // st(0). 2353 // st(0).
2474 // If Dest ends up being a physical xmm register, the fstp emit code 2354 // If Dest ends up being a physical xmm register, the fstp emit code
2475 // will route st(0) through a temporary stack slot. 2355 // will route st(0) through a temporary stack slot.
2476 _fstp(Dest); 2356 _fstp(Dest);
2477 // Create a fake use of Dest in case it actually isn't used, 2357 // Create a fake use of Dest in case it actually isn't used,
2478 // because st(0) still needs to be popped. 2358 // because st(0) still needs to be popped.
2479 Context.insert(InstFakeUse::create(Func, Dest)); 2359 Context.insert(InstFakeUse::create(Func, Dest));
2480 } 2360 }
2481 } 2361 }
2482 2362
2483 void TargetX8632::lowerCast(const InstCast *Inst) { 2363 template <class Machine>
2364 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 2365 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2485 InstCast::OpKind CastKind = Inst->getCastKind(); 2366 InstCast::OpKind CastKind = Inst->getCastKind();
2486 Variable *Dest = Inst->getDest(); 2367 Variable *Dest = Inst->getDest();
2487 switch (CastKind) { 2368 switch (CastKind) {
2488 default: 2369 default:
2489 Func->setError("Cast type not supported"); 2370 Func->setError("Cast type not supported");
2490 return; 2371 return;
2491 case InstCast::Sext: { 2372 case InstCast::Sext: {
2492 // Src0RM is the source operand legalized to physical register or memory, 2373 // Src0RM is the source operand legalized to physical register or memory,
2493 // but not immediate, since the relevant x86 native instructions don't 2374 // but not immediate, since the relevant x86 native instructions don't
2494 // allow an immediate operand. If the operand is an immediate, we could 2375 // allow an immediate operand. If the operand is an immediate, we could
2495 // consider computing the strength-reduced result at translation time, 2376 // consider computing the strength-reduced result at translation time,
2496 // but we're unlikely to see something like that in the bitcode that 2377 // but we're unlikely to see something like that in the bitcode that
2497 // the optimizer wouldn't have already taken care of. 2378 // the optimizer wouldn't have already taken care of.
2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2379 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2499 if (isVectorType(Dest->getType())) { 2380 if (isVectorType(Dest->getType())) {
2500 Type DestTy = Dest->getType(); 2381 Type DestTy = Dest->getType();
2501 if (DestTy == IceType_v16i8) { 2382 if (DestTy == IceType_v16i8) {
2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 2383 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2503 Variable *OneMask = makeVectorOfOnes(Dest->getType()); 2384 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2504 Variable *T = makeReg(DestTy); 2385 Variable *T = makeReg(DestTy);
2505 _movp(T, Src0RM); 2386 _movp(T, Src0RM);
2506 _pand(T, OneMask); 2387 _pand(T, OneMask);
2507 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 2388 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2508 _pcmpgt(T, Zeros); 2389 _pcmpgt(T, Zeros);
2509 _movp(Dest, T); 2390 _movp(Dest, T);
2510 } else { 2391 } else {
2511 // width = width(elty) - 1; dest = (src << width) >> width 2392 // width = width(elty) - 1; dest = (src << width) >> width
2512 SizeT ShiftAmount = 2393 SizeT ShiftAmount =
2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; 2394 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2395 1;
2514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 2396 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2515 Variable *T = makeReg(DestTy); 2397 Variable *T = makeReg(DestTy);
2516 _movp(T, Src0RM); 2398 _movp(T, Src0RM);
2517 _psll(T, ShiftConstant); 2399 _psll(T, ShiftConstant);
2518 _psra(T, ShiftConstant); 2400 _psra(T, ShiftConstant);
2519 _movp(Dest, T); 2401 _movp(Dest, T);
2520 } 2402 }
2521 } else if (Dest->getType() == IceType_i64) { 2403 } else if (Dest->getType() == IceType_i64) {
2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 2404 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2523 Constant *Shift = Ctx->getConstantInt32(31); 2405 Constant *Shift = Ctx->getConstantInt32(31);
(...skipping 14 matching lines...) Expand all
2538 _mov(T_Hi, T_Lo); 2420 _mov(T_Hi, T_Lo);
2539 if (Src0RM->getType() != IceType_i1) 2421 if (Src0RM->getType() != IceType_i1)
2540 // For i1, the sar instruction is already done above. 2422 // For i1, the sar instruction is already done above.
2541 _sar(T_Hi, Shift); 2423 _sar(T_Hi, Shift);
2542 _mov(DestHi, T_Hi); 2424 _mov(DestHi, T_Hi);
2543 } else if (Src0RM->getType() == IceType_i1) { 2425 } else if (Src0RM->getType() == IceType_i1) {
2544 // t1 = src 2426 // t1 = src
2545 // shl t1, dst_bitwidth - 1 2427 // shl t1, dst_bitwidth - 1
2546 // sar t1, dst_bitwidth - 1 2428 // sar t1, dst_bitwidth - 1
2547 // dst = t1 2429 // dst = t1
2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 2430 size_t DestBits =
2431 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); 2432 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2550 Variable *T = makeReg(Dest->getType()); 2433 Variable *T = makeReg(Dest->getType());
2551 if (typeWidthInBytes(Dest->getType()) <= 2434 if (typeWidthInBytes(Dest->getType()) <=
2552 typeWidthInBytes(Src0RM->getType())) { 2435 typeWidthInBytes(Src0RM->getType())) {
2553 _mov(T, Src0RM); 2436 _mov(T, Src0RM);
2554 } else { 2437 } else {
2555 // Widen the source using movsx or movzx. (It doesn't matter 2438 // Widen the source using movsx or movzx. (It doesn't matter
2556 // which one, since the following shl/sar overwrite the bits.) 2439 // which one, since the following shl/sar overwrite the bits.)
2557 _movzx(T, Src0RM); 2440 _movzx(T, Src0RM);
2558 } 2441 }
(...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after
2943 case IceType_v4i32: 2826 case IceType_v4i32:
2944 case IceType_v4f32: { 2827 case IceType_v4f32: {
2945 _movp(Dest, legalizeToVar(Src0)); 2828 _movp(Dest, legalizeToVar(Src0));
2946 } break; 2829 } break;
2947 } 2830 }
2948 break; 2831 break;
2949 } 2832 }
2950 } 2833 }
2951 } 2834 }
2952 2835
2953 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { 2836 template <class Machine>
2837 void TargetX86Base<Machine>::lowerExtractElement(
2838 const InstExtractElement *Inst) {
2954 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2839 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2955 ConstantInteger32 *ElementIndex = 2840 ConstantInteger32 *ElementIndex =
2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); 2841 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2957 // Only constant indices are allowed in PNaCl IR. 2842 // Only constant indices are allowed in PNaCl IR.
2958 assert(ElementIndex); 2843 assert(ElementIndex);
2959 2844
2960 unsigned Index = ElementIndex->getValue(); 2845 unsigned Index = ElementIndex->getValue();
2961 Type Ty = SourceVectNotLegalized->getType(); 2846 Type Ty = SourceVectNotLegalized->getType();
2962 Type ElementTy = typeElementType(Ty); 2847 Type ElementTy = typeElementType(Ty);
2963 Type InVectorElementTy = getInVectorElementType(Ty); 2848 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2964 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2849 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2965 2850
2966 // TODO(wala): Determine the best lowering sequences for each type. 2851 // TODO(wala): Determine the best lowering sequences for each type.
2967 bool CanUsePextr = 2852 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
2968 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2853 InstructionSet >= Machine::SSE4_1;
2969 if (CanUsePextr && Ty != IceType_v4f32) { 2854 if (CanUsePextr && Ty != IceType_v4f32) {
2970 // Use pextrb, pextrw, or pextrd. 2855 // Use pextrb, pextrw, or pextrd.
2971 Constant *Mask = Ctx->getConstantInt32(Index); 2856 Constant *Mask = Ctx->getConstantInt32(Index);
2972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2857 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2973 _pextr(ExtractedElementR, SourceVectR, Mask); 2858 _pextr(ExtractedElementR, SourceVectR, Mask);
2974 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2859 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2975 // Use pshufd and movd/movss. 2860 // Use pshufd and movd/movss.
2976 Variable *T = nullptr; 2861 Variable *T = nullptr;
2977 if (Index) { 2862 if (Index) {
2978 // The shuffle only needs to occur if the element to be extracted 2863 // The shuffle only needs to occur if the element to be extracted
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
3019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); 2904 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
3020 lowerCast(Cast); 2905 lowerCast(Cast);
3021 ExtractedElementR = T; 2906 ExtractedElementR = T;
3022 } 2907 }
3023 2908
3024 // Copy the element to the destination. 2909 // Copy the element to the destination.
3025 Variable *Dest = Inst->getDest(); 2910 Variable *Dest = Inst->getDest();
3026 _mov(Dest, ExtractedElementR); 2911 _mov(Dest, ExtractedElementR);
3027 } 2912 }
3028 2913
3029 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { 2914 template <class Machine>
2915 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
3030 Operand *Src0 = Inst->getSrc(0); 2916 Operand *Src0 = Inst->getSrc(0);
3031 Operand *Src1 = Inst->getSrc(1); 2917 Operand *Src1 = Inst->getSrc(1);
3032 Variable *Dest = Inst->getDest(); 2918 Variable *Dest = Inst->getDest();
3033 2919
3034 if (isVectorType(Dest->getType())) { 2920 if (isVectorType(Dest->getType())) {
3035 InstFcmp::FCond Condition = Inst->getCondition(); 2921 InstFcmp::FCond Condition = Inst->getCondition();
3036 size_t Index = static_cast<size_t>(Condition); 2922 size_t Index = static_cast<size_t>(Condition);
3037 assert(Index < TableFcmpSize); 2923 assert(Index < Traits::TableFcmpSize);
3038 2924
3039 if (TableFcmp[Index].SwapVectorOperands) { 2925 if (Traits::TableFcmp[Index].SwapVectorOperands) {
3040 Operand *T = Src0; 2926 Operand *T = Src0;
3041 Src0 = Src1; 2927 Src0 = Src1;
3042 Src1 = T; 2928 Src1 = T;
3043 } 2929 }
3044 2930
3045 Variable *T = nullptr; 2931 Variable *T = nullptr;
3046 2932
3047 if (Condition == InstFcmp::True) { 2933 if (Condition == InstFcmp::True) {
3048 // makeVectorOfOnes() requires an integer vector type. 2934 // makeVectorOfOnes() requires an integer vector type.
3049 T = makeVectorOfMinusOnes(IceType_v4i32); 2935 T = makeVectorOfMinusOnes(IceType_v4i32);
3050 } else if (Condition == InstFcmp::False) { 2936 } else if (Condition == InstFcmp::False) {
3051 T = makeVectorOfZeros(Dest->getType()); 2937 T = makeVectorOfZeros(Dest->getType());
3052 } else { 2938 } else {
3053 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2939 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3054 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2940 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3055 if (llvm::isa<OperandX8632Mem>(Src1RM)) 2941 if (llvm::isa<OperandX8632Mem>(Src1RM))
3056 Src1RM = legalizeToVar(Src1RM); 2942 Src1RM = legalizeToVar(Src1RM);
3057 2943
3058 switch (Condition) { 2944 switch (Condition) {
3059 default: { 2945 default: {
3060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate; 2946 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;
3061 assert(Predicate != CondX86::Cmpps_Invalid); 2947 assert(Predicate != CondX86::Cmpps_Invalid);
3062 T = makeReg(Src0RM->getType()); 2948 T = makeReg(Src0RM->getType());
3063 _movp(T, Src0RM); 2949 _movp(T, Src0RM);
3064 _cmpps(T, Src1RM, Predicate); 2950 _cmpps(T, Src1RM, Predicate);
3065 } break; 2951 } break;
3066 case InstFcmp::One: { 2952 case InstFcmp::One: {
3067 // Check both unequal and ordered. 2953 // Check both unequal and ordered.
3068 T = makeReg(Src0RM->getType()); 2954 T = makeReg(Src0RM->getType());
3069 Variable *T2 = makeReg(Src0RM->getType()); 2955 Variable *T2 = makeReg(Src0RM->getType());
3070 _movp(T, Src0RM); 2956 _movp(T, Src0RM);
(...skipping 28 matching lines...) Expand all
3099 // j<C2> label /* only if C2 != Br_None */ 2985 // j<C2> label /* only if C2 != Br_None */
3100 // FakeUse(a) /* only if C1 != Br_None */ 2986 // FakeUse(a) /* only if C1 != Br_None */
3101 // mov a, !<default> /* only if C1 != Br_None */ 2987 // mov a, !<default> /* only if C1 != Br_None */
3102 // label: /* only if C1 != Br_None */ 2988 // label: /* only if C1 != Br_None */
3103 // 2989 //
3104 // setcc lowering when C1 != Br_None && C2 == Br_None: 2990 // setcc lowering when C1 != Br_None && C2 == Br_None:
3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ 2991 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
3106 // setcc a, C1 2992 // setcc a, C1
3107 InstFcmp::FCond Condition = Inst->getCondition(); 2993 InstFcmp::FCond Condition = Inst->getCondition();
3108 size_t Index = static_cast<size_t>(Condition); 2994 size_t Index = static_cast<size_t>(Condition);
3109 assert(Index < TableFcmpSize); 2995 assert(Index < Traits::TableFcmpSize);
3110 if (TableFcmp[Index].SwapScalarOperands) 2996 if (Traits::TableFcmp[Index].SwapScalarOperands)
3111 std::swap(Src0, Src1); 2997 std::swap(Src0, Src1);
3112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); 2998 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None);
3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); 2999 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None);
3114 if (HasC1) { 3000 if (HasC1) {
3115 Src0 = legalize(Src0); 3001 Src0 = legalize(Src0);
3116 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 3002 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3117 Variable *T = nullptr; 3003 Variable *T = nullptr;
3118 _mov(T, Src0); 3004 _mov(T, Src0);
3119 _ucomiss(T, Src1RM); 3005 _ucomiss(T, Src1RM);
3120 if (!HasC2) { 3006 if (!HasC2) {
3121 assert(TableFcmp[Index].Default); 3007 assert(Traits::TableFcmp[Index].Default);
3122 _setcc(Dest, TableFcmp[Index].C1); 3008 _setcc(Dest, Traits::TableFcmp[Index].C1);
3123 return; 3009 return;
3124 } 3010 }
3125 } 3011 }
3126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default); 3012 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
3127 _mov(Dest, Default); 3013 _mov(Dest, Default);
3128 if (HasC1) { 3014 if (HasC1) {
3129 InstX8632Label *Label = InstX8632Label::create(Func, this); 3015 InstX8632Label *Label = InstX8632Label::create(Func, this);
3130 _br(TableFcmp[Index].C1, Label); 3016 _br(Traits::TableFcmp[Index].C1, Label);
3131 if (HasC2) { 3017 if (HasC2) {
3132 _br(TableFcmp[Index].C2, Label); 3018 _br(Traits::TableFcmp[Index].C2, Label);
3133 } 3019 }
3134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default); 3020 Constant *NonDefault =
3021 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
3135 _mov_nonkillable(Dest, NonDefault); 3022 _mov_nonkillable(Dest, NonDefault);
3136 Context.insert(Label); 3023 Context.insert(Label);
3137 } 3024 }
3138 } 3025 }
3139 3026
3140 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 3027 template <class Machine>
3028 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
3141 Operand *Src0 = legalize(Inst->getSrc(0)); 3029 Operand *Src0 = legalize(Inst->getSrc(0));
3142 Operand *Src1 = legalize(Inst->getSrc(1)); 3030 Operand *Src1 = legalize(Inst->getSrc(1));
3143 Variable *Dest = Inst->getDest(); 3031 Variable *Dest = Inst->getDest();
3144 3032
3145 if (isVectorType(Dest->getType())) { 3033 if (isVectorType(Dest->getType())) {
3146 Type Ty = Src0->getType(); 3034 Type Ty = Src0->getType();
3147 // Promote i1 vectors to 128 bit integer vector types. 3035 // Promote i1 vectors to 128 bit integer vector types.
3148 if (typeElementType(Ty) == IceType_i1) { 3036 if (typeElementType(Ty) == IceType_i1) {
3149 Type NewTy = IceType_NUM; 3037 Type NewTy = IceType_NUM;
3150 switch (Ty) { 3038 switch (Ty) {
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
3248 3136
3249 _movp(Dest, T); 3137 _movp(Dest, T);
3250 eliminateNextVectorSextInstruction(Dest); 3138 eliminateNextVectorSextInstruction(Dest);
3251 return; 3139 return;
3252 } 3140 }
3253 3141
3254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 3142 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
3255 if (Src0->getType() == IceType_i64) { 3143 if (Src0->getType() == IceType_i64) {
3256 InstIcmp::ICond Condition = Inst->getCondition(); 3144 InstIcmp::ICond Condition = Inst->getCondition();
3257 size_t Index = static_cast<size_t>(Condition); 3145 size_t Index = static_cast<size_t>(Condition);
3258 assert(Index < TableIcmp64Size); 3146 assert(Index < Traits::TableIcmp64Size);
3259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 3147 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 3148 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 3149 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 3150 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
3263 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3151 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3264 Constant *One = Ctx->getConstantInt32(1); 3152 Constant *One = Ctx->getConstantInt32(1);
3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); 3153 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); 3154 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
3267 _mov(Dest, One); 3155 _mov(Dest, One);
3268 _cmp(Src0HiRM, Src1HiRI); 3156 _cmp(Src0HiRM, Src1HiRI);
3269 if (TableIcmp64[Index].C1 != CondX86::Br_None) 3157 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None)
3270 _br(TableIcmp64[Index].C1, LabelTrue); 3158 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
3271 if (TableIcmp64[Index].C2 != CondX86::Br_None) 3159 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None)
3272 _br(TableIcmp64[Index].C2, LabelFalse); 3160 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
3273 _cmp(Src0LoRM, Src1LoRI); 3161 _cmp(Src0LoRM, Src1LoRI);
3274 _br(TableIcmp64[Index].C3, LabelTrue); 3162 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
3275 Context.insert(LabelFalse); 3163 Context.insert(LabelFalse);
3276 _mov_nonkillable(Dest, Zero); 3164 _mov_nonkillable(Dest, Zero);
3277 Context.insert(LabelTrue); 3165 Context.insert(LabelTrue);
3278 return; 3166 return;
3279 } 3167 }
3280 3168
3281 // cmp b, c 3169 // cmp b, c
3282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 3170 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
3283 _cmp(Src0RM, Src1); 3171 _cmp(Src0RM, Src1);
3284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition())); 3172 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
3285 } 3173 }
3286 3174
3287 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { 3175 template <class Machine>
3176 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
3288 Operand *SourceVectNotLegalized = Inst->getSrc(0); 3177 Operand *SourceVectNotLegalized = Inst->getSrc(0);
3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 3178 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
3290 ConstantInteger32 *ElementIndex = 3179 ConstantInteger32 *ElementIndex =
3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 3180 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
3292 // Only constant indices are allowed in PNaCl IR. 3181 // Only constant indices are allowed in PNaCl IR.
3293 assert(ElementIndex); 3182 assert(ElementIndex);
3294 unsigned Index = ElementIndex->getValue(); 3183 unsigned Index = ElementIndex->getValue();
3295 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 3184 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3296 3185
3297 Type Ty = SourceVectNotLegalized->getType(); 3186 Type Ty = SourceVectNotLegalized->getType();
3298 Type ElementTy = typeElementType(Ty); 3187 Type ElementTy = typeElementType(Ty);
3299 Type InVectorElementTy = getInVectorElementType(Ty); 3188 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3300 3189
3301 if (ElementTy == IceType_i1) { 3190 if (ElementTy == IceType_i1) {
3302 // Expand the element to the appropriate size for it to be inserted 3191 // Expand the element to the appropriate size for it to be inserted
3303 // in the vector. 3192 // in the vector.
3304 Variable *Expanded = Func->makeVariable(InVectorElementTy); 3193 Variable *Expanded = Func->makeVariable(InVectorElementTy);
3305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, 3194 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3306 ElementToInsertNotLegalized); 3195 ElementToInsertNotLegalized);
3307 lowerCast(Cast); 3196 lowerCast(Cast);
3308 ElementToInsertNotLegalized = Expanded; 3197 ElementToInsertNotLegalized = Expanded;
3309 } 3198 }
3310 3199
3311 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { 3200 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
3201 InstructionSet >= Machine::SSE4_1) {
3312 // Use insertps, pinsrb, pinsrw, or pinsrd. 3202 // Use insertps, pinsrb, pinsrw, or pinsrd.
3313 Operand *ElementRM = 3203 Operand *ElementRM =
3314 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 3204 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3315 Operand *SourceVectRM = 3205 Operand *SourceVectRM =
3316 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3206 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3317 Variable *T = makeReg(Ty); 3207 Variable *T = makeReg(Ty);
3318 _movp(T, SourceVectRM); 3208 _movp(T, SourceVectRM);
3319 if (Ty == IceType_v4f32) 3209 if (Ty == IceType_v4f32)
3320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 3210 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3321 else 3211 else
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
3400 OperandX8632Mem *Loc = 3290 OperandX8632Mem *Loc =
3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 3291 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); 3292 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
3403 3293
3404 Variable *T = makeReg(Ty); 3294 Variable *T = makeReg(Ty);
3405 _movp(T, Slot); 3295 _movp(T, Slot);
3406 _movp(Inst->getDest(), T); 3296 _movp(Inst->getDest(), T);
3407 } 3297 }
3408 } 3298 }
3409 3299
3410 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 3300 template <class Machine>
3301 void TargetX86Base<Machine>::lowerIntrinsicCall(
3302 const InstIntrinsicCall *Instr) {
3411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { 3303 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3412 case Intrinsics::AtomicCmpxchg: { 3304 case Intrinsics::AtomicCmpxchg: {
3413 if (!Intrinsics::isMemoryOrderValid( 3305 if (!Intrinsics::isMemoryOrderValid(
3414 ID, getConstantMemoryOrder(Instr->getArg(3)), 3306 ID, getConstantMemoryOrder(Instr->getArg(3)),
3415 getConstantMemoryOrder(Instr->getArg(4)))) { 3307 getConstantMemoryOrder(Instr->getArg(4)))) {
3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 3308 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3417 return; 3309 return;
3418 } 3310 }
3419 Variable *DestPrev = Instr->getDest(); 3311 Variable *DestPrev = Instr->getDest();
3420 Operand *PtrToMem = Instr->getArg(0); 3312 Operand *PtrToMem = Instr->getArg(0);
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
3503 Context.insert( 3395 Context.insert(
3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 3396 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
3505 return; 3397 return;
3506 } 3398 }
3507 case Intrinsics::AtomicRMW: 3399 case Intrinsics::AtomicRMW:
3508 if (!Intrinsics::isMemoryOrderValid( 3400 if (!Intrinsics::isMemoryOrderValid(
3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 3401 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3510 Func->setError("Unexpected memory ordering for AtomicRMW"); 3402 Func->setError("Unexpected memory ordering for AtomicRMW");
3511 return; 3403 return;
3512 } 3404 }
3513 lowerAtomicRMW( 3405 lowerAtomicRMW(Instr->getDest(),
3514 Instr->getDest(), 3406 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
3515 static_cast<uint32_t>( 3407 Instr->getArg(0))->getValue()),
3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), 3408 Instr->getArg(1), Instr->getArg(2));
3517 Instr->getArg(1), Instr->getArg(2));
3518 return; 3409 return;
3519 case Intrinsics::AtomicStore: { 3410 case Intrinsics::AtomicStore: {
3520 if (!Intrinsics::isMemoryOrderValid( 3411 if (!Intrinsics::isMemoryOrderValid(
3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 3412 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
3522 Func->setError("Unexpected memory ordering for AtomicStore"); 3413 Func->setError("Unexpected memory ordering for AtomicStore");
3523 return; 3414 return;
3524 } 3415 }
3525 // We require the memory address to be naturally aligned. 3416 // We require the memory address to be naturally aligned.
3526 // Given that is the case, then normal stores are atomic. 3417 // Given that is the case, then normal stores are atomic.
3527 // Add a fence after the store to make it visible. 3418 // Add a fence after the store to make it visible.
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
3733 case Intrinsics::Trap: 3624 case Intrinsics::Trap:
3734 _ud2(); 3625 _ud2();
3735 return; 3626 return;
3736 case Intrinsics::UnknownIntrinsic: 3627 case Intrinsics::UnknownIntrinsic:
3737 Func->setError("Should not be lowering UnknownIntrinsic"); 3628 Func->setError("Should not be lowering UnknownIntrinsic");
3738 return; 3629 return;
3739 } 3630 }
3740 return; 3631 return;
3741 } 3632 }
3742 3633
3743 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, 3634 template <class Machine>
3744 Operand *Expected, Operand *Desired) { 3635 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3636 Operand *Ptr, Operand *Expected,
3637 Operand *Desired) {
3745 if (Expected->getType() == IceType_i64) { 3638 if (Expected->getType() == IceType_i64) {
3746 // Reserve the pre-colored registers first, before adding any more 3639 // Reserve the pre-colored registers first, before adding any more
3747 // infinite-weight variables from formMemoryOperand's legalization. 3640 // infinite-weight variables from formMemoryOperand's legalization.
3748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 3641 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); 3642 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 3643 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); 3644 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
3752 _mov(T_eax, loOperand(Expected)); 3645 _mov(T_eax, loOperand(Expected));
3753 _mov(T_edx, hiOperand(Expected)); 3646 _mov(T_edx, hiOperand(Expected));
3754 _mov(T_ebx, loOperand(Desired)); 3647 _mov(T_ebx, loOperand(Desired));
3755 _mov(T_ecx, hiOperand(Desired)); 3648 _mov(T_ecx, hiOperand(Desired));
3756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3649 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
3757 const bool Locked = true; 3650 const bool Locked = true;
3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3651 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3652 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3653 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3761 _mov(DestLo, T_eax); 3654 _mov(DestLo, T_eax);
3762 _mov(DestHi, T_edx); 3655 _mov(DestHi, T_edx);
3763 return; 3656 return;
3764 } 3657 }
3765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); 3658 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
3766 _mov(T_eax, Expected); 3659 _mov(T_eax, Expected);
3767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3660 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
3768 Variable *DesiredReg = legalizeToVar(Desired); 3661 Variable *DesiredReg = legalizeToVar(Desired);
3769 const bool Locked = true; 3662 const bool Locked = true;
3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3663 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3771 _mov(DestPrev, T_eax); 3664 _mov(DestPrev, T_eax);
3772 } 3665 }
3773 3666
3774 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, 3667 template <class Machine>
3775 Operand *Expected, 3668 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3776 Operand *Desired) { 3669 Operand *PtrToMem,
3670 Operand *Expected,
3671 Operand *Desired) {
3777 if (Ctx->getFlags().getOptLevel() == Opt_m1) 3672 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3778 return false; 3673 return false;
3779 // Peek ahead a few instructions and see how Dest is used. 3674 // Peek ahead a few instructions and see how Dest is used.
3780 // It's very common to have: 3675 // It's very common to have:
3781 // 3676 //
3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) 3677 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3783 // [%y_phi = ...] // list of phi stores 3678 // [%y_phi = ...] // list of phi stores
3784 // %p = icmp eq i32 %x, %expected 3679 // %p = icmp eq i32 %x, %expected
3785 // br i1 %p, label %l1, label %l2 3680 // br i1 %p, label %l1, label %l2
3786 // 3681 //
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
3837 NextBr->setDeleted(); 3732 NextBr->setDeleted();
3838 Context.advanceNext(); 3733 Context.advanceNext();
3839 Context.advanceNext(); 3734 Context.advanceNext();
3840 return true; 3735 return true;
3841 } 3736 }
3842 } 3737 }
3843 } 3738 }
3844 return false; 3739 return false;
3845 } 3740 }
3846 3741
3847 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3742 template <class Machine>
3848 Operand *Ptr, Operand *Val) { 3743 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3744 Operand *Ptr, Operand *Val) {
3849 bool NeedsCmpxchg = false; 3745 bool NeedsCmpxchg = false;
3850 LowerBinOp Op_Lo = nullptr; 3746 LowerBinOp Op_Lo = nullptr;
3851 LowerBinOp Op_Hi = nullptr; 3747 LowerBinOp Op_Hi = nullptr;
3852 switch (Operation) { 3748 switch (Operation) {
3853 default: 3749 default:
3854 Func->setError("Unknown AtomicRMW operation"); 3750 Func->setError("Unknown AtomicRMW operation");
3855 return; 3751 return;
3856 case Intrinsics::AtomicAdd: { 3752 case Intrinsics::AtomicAdd: {
3857 if (Dest->getType() == IceType_i64) { 3753 if (Dest->getType() == IceType_i64) {
3858 // All the fall-through paths must set this to true, but use this 3754 // All the fall-through paths must set this to true, but use this
3859 // for asserting. 3755 // for asserting.
3860 NeedsCmpxchg = true; 3756 NeedsCmpxchg = true;
3861 Op_Lo = &TargetX8632::_add; 3757 Op_Lo = &TargetX86Base<Machine>::_add;
3862 Op_Hi = &TargetX8632::_adc; 3758 Op_Hi = &TargetX86Base<Machine>::_adc;
3863 break; 3759 break;
3864 } 3760 }
3865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3761 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
3866 const bool Locked = true; 3762 const bool Locked = true;
3867 Variable *T = nullptr; 3763 Variable *T = nullptr;
3868 _mov(T, Val); 3764 _mov(T, Val);
3869 _xadd(Addr, T, Locked); 3765 _xadd(Addr, T, Locked);
3870 _mov(Dest, T); 3766 _mov(Dest, T);
3871 return; 3767 return;
3872 } 3768 }
3873 case Intrinsics::AtomicSub: { 3769 case Intrinsics::AtomicSub: {
3874 if (Dest->getType() == IceType_i64) { 3770 if (Dest->getType() == IceType_i64) {
3875 NeedsCmpxchg = true; 3771 NeedsCmpxchg = true;
3876 Op_Lo = &TargetX8632::_sub; 3772 Op_Lo = &TargetX86Base<Machine>::_sub;
3877 Op_Hi = &TargetX8632::_sbb; 3773 Op_Hi = &TargetX86Base<Machine>::_sbb;
3878 break; 3774 break;
3879 } 3775 }
3880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3776 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
3881 const bool Locked = true; 3777 const bool Locked = true;
3882 Variable *T = nullptr; 3778 Variable *T = nullptr;
3883 _mov(T, Val); 3779 _mov(T, Val);
3884 _neg(T); 3780 _neg(T);
3885 _xadd(Addr, T, Locked); 3781 _xadd(Addr, T, Locked);
3886 _mov(Dest, T); 3782 _mov(Dest, T);
3887 return; 3783 return;
3888 } 3784 }
3889 case Intrinsics::AtomicOr: 3785 case Intrinsics::AtomicOr:
3890 // TODO(jvoung): If Dest is null or dead, then some of these 3786 // TODO(jvoung): If Dest is null or dead, then some of these
3891 // operations do not need an "exchange", but just a locked op. 3787 // operations do not need an "exchange", but just a locked op.
3892 // That appears to be "worth" it for sub, or, and, and xor. 3788 // That appears to be "worth" it for sub, or, and, and xor.
3893 // xadd is probably fine vs lock add for add, and xchg is fine 3789 // xadd is probably fine vs lock add for add, and xchg is fine
3894 // vs an atomic store. 3790 // vs an atomic store.
3895 NeedsCmpxchg = true; 3791 NeedsCmpxchg = true;
3896 Op_Lo = &TargetX8632::_or; 3792 Op_Lo = &TargetX86Base<Machine>::_or;
3897 Op_Hi = &TargetX8632::_or; 3793 Op_Hi = &TargetX86Base<Machine>::_or;
3898 break; 3794 break;
3899 case Intrinsics::AtomicAnd: 3795 case Intrinsics::AtomicAnd:
3900 NeedsCmpxchg = true; 3796 NeedsCmpxchg = true;
3901 Op_Lo = &TargetX8632::_and; 3797 Op_Lo = &TargetX86Base<Machine>::_and;
3902 Op_Hi = &TargetX8632::_and; 3798 Op_Hi = &TargetX86Base<Machine>::_and;
3903 break; 3799 break;
3904 case Intrinsics::AtomicXor: 3800 case Intrinsics::AtomicXor:
3905 NeedsCmpxchg = true; 3801 NeedsCmpxchg = true;
3906 Op_Lo = &TargetX8632::_xor; 3802 Op_Lo = &TargetX86Base<Machine>::_xor;
3907 Op_Hi = &TargetX8632::_xor; 3803 Op_Hi = &TargetX86Base<Machine>::_xor;
3908 break; 3804 break;
3909 case Intrinsics::AtomicExchange: 3805 case Intrinsics::AtomicExchange:
3910 if (Dest->getType() == IceType_i64) { 3806 if (Dest->getType() == IceType_i64) {
3911 NeedsCmpxchg = true; 3807 NeedsCmpxchg = true;
3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3808 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3913 // just need to be moved to the ecx and ebx registers. 3809 // just need to be moved to the ecx and ebx registers.
3914 Op_Lo = nullptr; 3810 Op_Lo = nullptr;
3915 Op_Hi = nullptr; 3811 Op_Hi = nullptr;
3916 break; 3812 break;
3917 } 3813 }
3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3814 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
3919 Variable *T = nullptr; 3815 Variable *T = nullptr;
3920 _mov(T, Val); 3816 _mov(T, Val);
3921 _xchg(Addr, T); 3817 _xchg(Addr, T);
3922 _mov(Dest, T); 3818 _mov(Dest, T);
3923 return; 3819 return;
3924 } 3820 }
3925 // Otherwise, we need a cmpxchg loop. 3821 // Otherwise, we need a cmpxchg loop.
3926 (void)NeedsCmpxchg; 3822 (void)NeedsCmpxchg;
3927 assert(NeedsCmpxchg); 3823 assert(NeedsCmpxchg);
3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); 3824 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3929 } 3825 }
3930 3826
3931 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, 3827 template <class Machine>
3932 Variable *Dest, Operand *Ptr, 3828 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3933 Operand *Val) { 3829 LowerBinOp Op_Hi,
3830 Variable *Dest,
3831 Operand *Ptr,
3832 Operand *Val) {
3934 // Expand a more complex RMW operation as a cmpxchg loop: 3833 // Expand a more complex RMW operation as a cmpxchg loop:
3935 // For 64-bit: 3834 // For 64-bit:
3936 // mov eax, [ptr] 3835 // mov eax, [ptr]
3937 // mov edx, [ptr + 4] 3836 // mov edx, [ptr + 4]
3938 // .LABEL: 3837 // .LABEL:
3939 // mov ebx, eax 3838 // mov ebx, eax
3940 // <Op_Lo> ebx, <desired_adj_lo> 3839 // <Op_Lo> ebx, <desired_adj_lo>
3941 // mov ecx, edx 3840 // mov ecx, edx
3942 // <Op_Hi> ecx, <desired_adj_hi> 3841 // <Op_Hi> ecx, <desired_adj_hi>
3943 // lock cmpxchg8b [ptr] 3842 // lock cmpxchg8b [ptr]
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
4028 // The address base (if any) is also reused in the loop. 3927 // The address base (if any) is also reused in the loop.
4029 if (Variable *Base = Addr->getBase()) 3928 if (Variable *Base = Addr->getBase())
4030 Context.insert(InstFakeUse::create(Func, Base)); 3929 Context.insert(InstFakeUse::create(Func, Base));
4031 _mov(Dest, T_eax); 3930 _mov(Dest, T_eax);
4032 } 3931 }
4033 3932
4034 // Lowers count {trailing, leading} zeros intrinsic. 3933 // Lowers count {trailing, leading} zeros intrinsic.
4035 // 3934 //
4036 // We could do constant folding here, but that should have 3935 // We could do constant folding here, but that should have
4037 // been done by the front-end/middle-end optimizations. 3936 // been done by the front-end/middle-end optimizations.
4038 void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, 3937 template <class Machine>
4039 Operand *FirstVal, Operand *SecondVal) { 3938 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3939 Operand *FirstVal,
3940 Operand *SecondVal) {
4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). 3941 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
4041 // Then the instructions will handle the Val == 0 case much more simply 3942 // Then the instructions will handle the Val == 0 case much more simply
4042 // and won't require conversion from bit position to number of zeros. 3943 // and won't require conversion from bit position to number of zeros.
4043 // 3944 //
4044 // Otherwise: 3945 // Otherwise:
4045 // bsr IF_NOT_ZERO, Val 3946 // bsr IF_NOT_ZERO, Val
4046 // mov T_DEST, 63 3947 // mov T_DEST, 63
4047 // cmovne T_DEST, IF_NOT_ZERO 3948 // cmovne T_DEST, IF_NOT_ZERO
4048 // xor T_DEST, 31 3949 // xor T_DEST, 31
4049 // mov DEST, T_DEST 3950 // mov DEST, T_DEST
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
4100 } else { 4001 } else {
4101 _bsr(T_Dest2, SecondVar); 4002 _bsr(T_Dest2, SecondVar);
4102 _xor(T_Dest2, ThirtyOne); 4003 _xor(T_Dest2, ThirtyOne);
4103 } 4004 }
4104 _test(SecondVar, SecondVar); 4005 _test(SecondVar, SecondVar);
4105 _cmov(T_Dest2, T_Dest, CondX86::Br_e); 4006 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
4106 _mov(DestLo, T_Dest2); 4007 _mov(DestLo, T_Dest2);
4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 4008 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4108 } 4009 }
4109 4010
4110 namespace {
4111
4112 bool isAdd(const Inst *Inst) { 4011 bool isAdd(const Inst *Inst) {
4113 if (const InstArithmetic *Arith = 4012 if (const InstArithmetic *Arith =
4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 4013 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
4115 return (Arith->getOp() == InstArithmetic::Add); 4014 return (Arith->getOp() == InstArithmetic::Add);
4116 } 4015 }
4117 return false; 4016 return false;
4118 } 4017 }
4119 4018
4120 void dumpAddressOpt(const Cfg *Func, const Variable *Base, 4019 void dumpAddressOpt(const Cfg *Func, const Variable *Base,
4121 const Variable *Index, uint16_t Shift, int32_t Offset, 4020 const Variable *Index, uint16_t Shift, int32_t Offset,
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
4342 // set Index=Var, Offset+=(Const<<Shift) 4241 // set Index=Var, Offset+=(Const<<Shift)
4343 4242
4344 // Index is Index=Var-Const ==> 4243 // Index is Index=Var-Const ==>
4345 // set Index=Var, Offset-=(Const<<Shift) 4244 // set Index=Var, Offset-=(Const<<Shift)
4346 4245
4347 // TODO: consider overflow issues with respect to Offset. 4246 // TODO: consider overflow issues with respect to Offset.
4348 // TODO: handle symbolic constants. 4247 // TODO: handle symbolic constants.
4349 } 4248 }
4350 } 4249 }
4351 4250
4352 } // anonymous namespace 4251 template <class Machine>
4353 4252 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
4354 void TargetX8632::lowerLoad(const InstLoad *Load) {
4355 // A Load instruction can be treated the same as an Assign 4253 // A Load instruction can be treated the same as an Assign
4356 // instruction, after the source operand is transformed into an 4254 // instruction, after the source operand is transformed into an
4357 // OperandX8632Mem operand. Note that the address mode 4255 // OperandX8632Mem operand. Note that the address mode
4358 // optimization already creates an OperandX8632Mem operand, so it 4256 // optimization already creates an OperandX8632Mem operand, so it
4359 // doesn't need another level of transformation. 4257 // doesn't need another level of transformation.
4360 Variable *DestLoad = Load->getDest(); 4258 Variable *DestLoad = Load->getDest();
4361 Type Ty = DestLoad->getType(); 4259 Type Ty = DestLoad->getType();
4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4260 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 4261 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4364 lowerAssign(Assign); 4262 lowerAssign(Assign);
4365 } 4263 }
4366 4264
4367 void TargetX8632::doAddressOptLoad() { 4265 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4368 Inst *Inst = Context.getCur(); 4266 Inst *Inst = Context.getCur();
4369 Variable *Dest = Inst->getDest(); 4267 Variable *Dest = Inst->getDest();
4370 Operand *Addr = Inst->getSrc(0); 4268 Operand *Addr = Inst->getSrc(0);
4371 Variable *Index = nullptr; 4269 Variable *Index = nullptr;
4372 uint16_t Shift = 0; 4270 uint16_t Shift = 0;
4373 int32_t Offset = 0; // TODO: make Constant 4271 int32_t Offset = 0; // TODO: make Constant
4374 // Vanilla ICE load instructions should not use the segment registers, 4272 // Vanilla ICE load instructions should not use the segment registers,
4375 // and computeAddressOpt only works at the level of Variables and Constants, 4273 // and computeAddressOpt only works at the level of Variables and Constants,
4376 // not other OperandX8632Mem, so there should be no mention of segment 4274 // not other OperandX8632Mem, so there should be no mention of segment
4377 // registers there either. 4275 // registers there either.
4378 const OperandX8632Mem::SegmentRegisters SegmentReg = 4276 const OperandX8632Mem::SegmentRegisters SegmentReg =
4379 OperandX8632Mem::DefaultSegment; 4277 OperandX8632Mem::DefaultSegment;
4380 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4278 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4279 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4382 if (Base && Addr != Base) { 4280 if (Base && Addr != Base) {
4383 Inst->setDeleted(); 4281 Inst->setDeleted();
4384 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4282 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 4283 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
4386 Shift, SegmentReg); 4284 Shift, SegmentReg);
4387 Context.insert(InstLoad::create(Func, Dest, Addr)); 4285 Context.insert(InstLoad::create(Func, Dest, Addr));
4388 } 4286 }
4389 } 4287 }
4390 4288
4391 void TargetX8632::randomlyInsertNop(float Probability) { 4289 template <class Machine>
4290 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 4291 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4393 if (RNG.getTrueWithProbability(Probability)) { 4292 if (RNG.getTrueWithProbability(Probability)) {
4394 _nop(RNG(X86_NUM_NOP_VARIANTS)); 4293 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4395 } 4294 }
4396 } 4295 }
4397 4296
4398 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { 4297 template <class Machine>
4298 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4399 Func->setError("Phi found in regular instruction list"); 4299 Func->setError("Phi found in regular instruction list");
4400 } 4300 }
4401 4301
4402 void TargetX8632::lowerRet(const InstRet *Inst) { 4302 template <class Machine>
4303 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {
4403 Variable *Reg = nullptr; 4304 Variable *Reg = nullptr;
4404 if (Inst->hasRetValue()) { 4305 if (Inst->hasRetValue()) {
4405 Operand *Src0 = legalize(Inst->getRetValue()); 4306 Operand *Src0 = legalize(Inst->getRetValue());
4406 if (Src0->getType() == IceType_i64) { 4307 if (Src0->getType() == IceType_i64) {
4407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); 4308 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); 4309 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
4409 Reg = eax; 4310 Reg = eax;
4410 Context.insert(InstFakeUse::create(Func, edx)); 4311 Context.insert(InstFakeUse::create(Func, edx));
4411 } else if (isScalarFloatingType(Src0->getType())) { 4312 } else if (isScalarFloatingType(Src0->getType())) {
4412 _fld(Src0); 4313 _fld(Src0);
4413 } else if (isVectorType(Src0->getType())) { 4314 } else if (isVectorType(Src0->getType())) {
4414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); 4315 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
4415 } else { 4316 } else {
4416 _mov(Reg, Src0, RegX8632::Reg_eax); 4317 _mov(Reg, Src0, RegX8632::Reg_eax);
4417 } 4318 }
4418 } 4319 }
4419 // Add a ret instruction even if sandboxing is enabled, because 4320 // Add a ret instruction even if sandboxing is enabled, because
4420 // addEpilog explicitly looks for a ret instruction as a marker for 4321 // addEpilog explicitly looks for a ret instruction as a marker for
4421 // where to insert the frame removal instructions. 4322 // where to insert the frame removal instructions.
4422 _ret(Reg); 4323 _ret(Reg);
4423 // Add a fake use of esp to make sure esp stays alive for the entire 4324 // Add a fake use of esp to make sure esp stays alive for the entire
4424 // function. Otherwise post-call esp adjustments get dead-code 4325 // function. Otherwise post-call esp adjustments get dead-code
4425 // eliminated. TODO: Are there more places where the fake use 4326 // eliminated. TODO: Are there more places where the fake use
4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 4327 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4427 // have a ret instruction. 4328 // have a ret instruction.
4428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 4329 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
4429 Context.insert(InstFakeUse::create(Func, esp)); 4330 Context.insert(InstFakeUse::create(Func, esp));
4430 } 4331 }
4431 4332
4432 void TargetX8632::lowerSelect(const InstSelect *Inst) { 4333 template <class Machine>
4334 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4433 Variable *Dest = Inst->getDest(); 4335 Variable *Dest = Inst->getDest();
4434 Type DestTy = Dest->getType(); 4336 Type DestTy = Dest->getType();
4435 Operand *SrcT = Inst->getTrueOperand(); 4337 Operand *SrcT = Inst->getTrueOperand();
4436 Operand *SrcF = Inst->getFalseOperand(); 4338 Operand *SrcF = Inst->getFalseOperand();
4437 Operand *Condition = Inst->getCondition(); 4339 Operand *Condition = Inst->getCondition();
4438 4340
4439 if (isVectorType(DestTy)) { 4341 if (isVectorType(DestTy)) {
4440 Type SrcTy = SrcT->getType(); 4342 Type SrcTy = SrcT->getType();
4441 Variable *T = makeReg(SrcTy); 4343 Variable *T = makeReg(SrcTy);
4442 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 4344 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 4345 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4444 if (InstructionSet >= SSE4_1) { 4346 if (InstructionSet >= Machine::SSE4_1) {
4445 // TODO(wala): If the condition operand is a constant, use blendps 4347 // TODO(wala): If the condition operand is a constant, use blendps
4446 // or pblendw. 4348 // or pblendw.
4447 // 4349 //
4448 // Use blendvps or pblendvb to implement select. 4350 // Use blendvps or pblendvb to implement select.
4449 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 4351 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4450 SrcTy == IceType_v4f32) { 4352 SrcTy == IceType_v4f32) {
4451 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4353 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); 4354 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
4453 _movp(xmm0, ConditionRM); 4355 _movp(xmm0, ConditionRM);
4454 _psll(xmm0, Ctx->getConstantInt8(31)); 4356 _psll(xmm0, Ctx->getConstantInt8(31));
4455 _movp(T, SrcFRM); 4357 _movp(T, SrcFRM);
4456 _blendvps(T, SrcTRM, xmm0); 4358 _blendvps(T, SrcTRM, xmm0);
4457 _movp(Dest, T); 4359 _movp(Dest, T);
4458 } else { 4360 } else {
4459 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 4361 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 4362 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4461 : IceType_v16i8; 4363 : IceType_v16i8;
4462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); 4364 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
4463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 4365 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4464 _movp(T, SrcFRM); 4366 _movp(T, SrcFRM);
4465 _pblendvb(T, SrcTRM, xmm0); 4367 _pblendvb(T, SrcTRM, xmm0);
4466 _movp(Dest, T); 4368 _movp(Dest, T);
4467 } 4369 }
4468 return; 4370 return;
4469 } 4371 }
4470 // Lower select without SSE4.1: 4372 // Lower select without Machine::SSE4.1:
4471 // a=d?b:c ==> 4373 // a=d?b:c ==>
4472 // if elementtype(d) != i1: 4374 // if elementtype(d) != i1:
4473 // d=sext(d); 4375 // d=sext(d);
4474 // a=(b&d)|(c&~d); 4376 // a=(b&d)|(c&~d);
4475 Variable *T2 = makeReg(SrcTy); 4377 Variable *T2 = makeReg(SrcTy);
4476 // Sign extend the condition operand if applicable. 4378 // Sign extend the condition operand if applicable.
4477 if (SrcTy == IceType_v4f32) { 4379 if (SrcTy == IceType_v4f32) {
4478 // The sext operation takes only integer arguments. 4380 // The sext operation takes only integer arguments.
4479 Variable *T3 = Func->makeVariable(IceType_v4i32); 4381 Variable *T3 = Func->makeVariable(IceType_v4i32);
4480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 4382 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
(...skipping 17 matching lines...) Expand all
4498 Operand *CmpOpnd0 = nullptr; 4400 Operand *CmpOpnd0 = nullptr;
4499 Operand *CmpOpnd1 = nullptr; 4401 Operand *CmpOpnd1 = nullptr;
4500 // Handle folding opportunities. 4402 // Handle folding opportunities.
4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4403 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4502 assert(Producer->isDeleted()); 4404 assert(Producer->isDeleted());
4503 switch (BoolFolding::getProducerKind(Producer)) { 4405 switch (BoolFolding::getProducerKind(Producer)) {
4504 default: 4406 default:
4505 break; 4407 break;
4506 case BoolFolding::PK_Icmp32: { 4408 case BoolFolding::PK_Icmp32: {
4507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 4409 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4508 Cond = getIcmp32Mapping(Cmp->getCondition()); 4410 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
4509 CmpOpnd1 = legalize(Producer->getSrc(1)); 4411 CmpOpnd1 = legalize(Producer->getSrc(1));
4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); 4412 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4511 } break; 4413 } break;
4512 } 4414 }
4513 } 4415 }
4514 if (CmpOpnd0 == nullptr) { 4416 if (CmpOpnd0 == nullptr) {
4515 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); 4417 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); 4418 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4517 } 4419 }
4518 assert(CmpOpnd0); 4420 assert(CmpOpnd0);
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
4562 4464
4563 assert(DestTy == IceType_i16 || DestTy == IceType_i32); 4465 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4564 Variable *T = nullptr; 4466 Variable *T = nullptr;
4565 SrcF = legalize(SrcF); 4467 SrcF = legalize(SrcF);
4566 _mov(T, SrcF); 4468 _mov(T, SrcF);
4567 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4469 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4568 _cmov(T, SrcT, Cond); 4470 _cmov(T, SrcT, Cond);
4569 _mov(Dest, T); 4471 _mov(Dest, T);
4570 } 4472 }
4571 4473
4572 void TargetX8632::lowerStore(const InstStore *Inst) { 4474 template <class Machine>
4475 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4573 Operand *Value = Inst->getData(); 4476 Operand *Value = Inst->getData();
4574 Operand *Addr = Inst->getAddr(); 4477 Operand *Addr = Inst->getAddr();
4575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 4478 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
4576 Type Ty = NewAddr->getType(); 4479 Type Ty = NewAddr->getType();
4577 4480
4578 if (Ty == IceType_i64) { 4481 if (Ty == IceType_i64) {
4579 Value = legalize(Value); 4482 Value = legalize(Value);
4580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4483 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4484 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); 4485 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); 4486 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
4584 } else if (isVectorType(Ty)) { 4487 } else if (isVectorType(Ty)) {
4585 _storep(legalizeToVar(Value), NewAddr); 4488 _storep(legalizeToVar(Value), NewAddr);
4586 } else { 4489 } else {
4587 Value = legalize(Value, Legal_Reg | Legal_Imm); 4490 Value = legalize(Value, Legal_Reg | Legal_Imm);
4588 _store(Value, NewAddr); 4491 _store(Value, NewAddr);
4589 } 4492 }
4590 } 4493 }
4591 4494
4592 void TargetX8632::doAddressOptStore() { 4495 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); 4496 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4594 Operand *Data = Inst->getData(); 4497 Operand *Data = Inst->getData();
4595 Operand *Addr = Inst->getAddr(); 4498 Operand *Addr = Inst->getAddr();
4596 Variable *Index = nullptr; 4499 Variable *Index = nullptr;
4597 uint16_t Shift = 0; 4500 uint16_t Shift = 0;
4598 int32_t Offset = 0; // TODO: make Constant 4501 int32_t Offset = 0; // TODO: make Constant
4599 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4502 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4600 // Vanilla ICE store instructions should not use the segment registers, 4503 // Vanilla ICE store instructions should not use the segment registers,
4601 // and computeAddressOpt only works at the level of Variables and Constants, 4504 // and computeAddressOpt only works at the level of Variables and Constants,
4602 // not other OperandX8632Mem, so there should be no mention of segment 4505 // not other OperandX8632Mem, so there should be no mention of segment
4603 // registers there either. 4506 // registers there either.
4604 const OperandX8632Mem::SegmentRegisters SegmentReg = 4507 const OperandX8632Mem::SegmentRegisters SegmentReg =
4605 OperandX8632Mem::DefaultSegment; 4508 OperandX8632Mem::DefaultSegment;
4606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4509 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4607 if (Base && Addr != Base) { 4510 if (Base && Addr != Base) {
4608 Inst->setDeleted(); 4511 Inst->setDeleted();
4609 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4512 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4513 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4611 Shift, SegmentReg); 4514 Shift, SegmentReg);
4612 InstStore *NewStore = InstStore::create(Func, Data, Addr); 4515 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4613 if (Inst->getDest()) 4516 if (Inst->getDest())
4614 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4517 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4615 Context.insert(NewStore); 4518 Context.insert(NewStore);
4616 } 4519 }
4617 } 4520 }
4618 4521
4619 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4522 template <class Machine>
4523 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4620 // This implements the most naive possible lowering. 4524 // This implements the most naive possible lowering.
4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4525 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4622 Operand *Src0 = Inst->getComparison(); 4526 Operand *Src0 = Inst->getComparison();
4623 SizeT NumCases = Inst->getNumCases(); 4527 SizeT NumCases = Inst->getNumCases();
4624 if (Src0->getType() == IceType_i64) { 4528 if (Src0->getType() == IceType_i64) {
4625 Src0 = legalize(Src0); // get Base/Index into physical registers 4529 Src0 = legalize(Src0); // get Base/Index into physical registers
4626 Operand *Src0Lo = loOperand(Src0); 4530 Operand *Src0Lo = loOperand(Src0);
4627 Operand *Src0Hi = hiOperand(Src0); 4531 Operand *Src0Hi = hiOperand(Src0);
4628 if (NumCases >= 2) { 4532 if (NumCases >= 2) {
4629 Src0Lo = legalizeToVar(Src0Lo); 4533 Src0Lo = legalizeToVar(Src0Lo);
(...skipping 23 matching lines...) Expand all
4653 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); 4557 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4654 for (SizeT I = 0; I < NumCases; ++I) { 4558 for (SizeT I = 0; I < NumCases; ++I) {
4655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); 4559 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4656 _cmp(Src0, Value); 4560 _cmp(Src0, Value);
4657 _br(CondX86::Br_e, Inst->getLabel(I)); 4561 _br(CondX86::Br_e, Inst->getLabel(I));
4658 } 4562 }
4659 4563
4660 _br(Inst->getLabelDefault()); 4564 _br(Inst->getLabelDefault());
4661 } 4565 }
4662 4566
4663 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4567 template <class Machine>
4664 Variable *Dest, Operand *Src0, 4568 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4665 Operand *Src1) { 4569 Variable *Dest, Operand *Src0,
4570 Operand *Src1) {
4666 assert(isVectorType(Dest->getType())); 4571 assert(isVectorType(Dest->getType()));
4667 Type Ty = Dest->getType(); 4572 Type Ty = Dest->getType();
4668 Type ElementTy = typeElementType(Ty); 4573 Type ElementTy = typeElementType(Ty);
4669 SizeT NumElements = typeNumElements(Ty); 4574 SizeT NumElements = typeNumElements(Ty);
4670 4575
4671 Operand *T = Ctx->getConstantUndef(Ty); 4576 Operand *T = Ctx->getConstantUndef(Ty);
4672 for (SizeT I = 0; I < NumElements; ++I) { 4577 for (SizeT I = 0; I < NumElements; ++I) {
4673 Constant *Index = Ctx->getConstantInt32(I); 4578 Constant *Index = Ctx->getConstantInt32(I);
4674 4579
4675 // Extract the next two inputs. 4580 // Extract the next two inputs.
(...skipping 16 matching lines...) Expand all
4692 } 4597 }
4693 4598
4694 // The following pattern occurs often in lowered C and C++ code: 4599 // The following pattern occurs often in lowered C and C++ code:
4695 // 4600 //
4696 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4601 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4697 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 4602 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
4698 // 4603 //
4699 // We can eliminate the sext operation by copying the result of pcmpeqd, 4604 // We can eliminate the sext operation by copying the result of pcmpeqd,
4700 // pcmpgtd, or cmpps (which produce sign extended results) to the result 4605 // pcmpgtd, or cmpps (which produce sign extended results) to the result
4701 // of the sext operation. 4606 // of the sext operation.
4702 void TargetX8632::eliminateNextVectorSextInstruction( 4607 template <class Machine>
4608 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4703 Variable *SignExtendedResult) { 4609 Variable *SignExtendedResult) {
4704 if (InstCast *NextCast = 4610 if (InstCast *NextCast =
4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { 4611 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4706 if (NextCast->getCastKind() == InstCast::Sext && 4612 if (NextCast->getCastKind() == InstCast::Sext &&
4707 NextCast->getSrc(0) == SignExtendedResult) { 4613 NextCast->getSrc(0) == SignExtendedResult) {
4708 NextCast->setDeleted(); 4614 NextCast->setDeleted();
4709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4615 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4710 // Skip over the instruction. 4616 // Skip over the instruction.
4711 Context.advanceNext(); 4617 Context.advanceNext();
4712 } 4618 }
4713 } 4619 }
4714 } 4620 }
4715 4621
4716 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } 4622 template <class Machine>
4623 void TargetX86Base<Machine>::lowerUnreachable(
4624 const InstUnreachable * /*Inst*/) {
4625 _ud2();
4626 }
4717 4627
4718 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { 4628 template <class Machine>
4629 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) {
4719 // If the beacon variable's live range does not end in this 4630 // If the beacon variable's live range does not end in this
4720 // instruction, then it must end in the modified Store instruction 4631 // instruction, then it must end in the modified Store instruction
4721 // that follows. This means that the original Store instruction is 4632 // that follows. This means that the original Store instruction is
4722 // still there, either because the value being stored is used beyond 4633 // still there, either because the value being stored is used beyond
4723 // the Store instruction, or because dead code elimination did not 4634 // the Store instruction, or because dead code elimination did not
4724 // happen. In either case, we cancel RMW lowering (and the caller 4635 // happen. In either case, we cancel RMW lowering (and the caller
4725 // deletes the RMW instruction). 4636 // deletes the RMW instruction).
4726 if (!RMW->isLastUse(RMW->getBeacon())) 4637 if (!RMW->isLastUse(RMW->getBeacon()))
4727 return; 4638 return;
4728 Operand *Src = RMW->getData(); 4639 Operand *Src = RMW->getData();
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
4782 return; 4693 return;
4783 case InstArithmetic::Xor: 4694 case InstArithmetic::Xor:
4784 Src = legalize(Src, Legal_Reg | Legal_Imm); 4695 Src = legalize(Src, Legal_Reg | Legal_Imm);
4785 _xor_rmw(Addr, Src); 4696 _xor_rmw(Addr, Src);
4786 return; 4697 return;
4787 } 4698 }
4788 } 4699 }
4789 llvm::report_fatal_error("Couldn't lower RMW instruction"); 4700 llvm::report_fatal_error("Couldn't lower RMW instruction");
4790 } 4701 }
4791 4702
4792 void TargetX8632::lowerOther(const Inst *Instr) { 4703 template <class Machine>
4704 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { 4705 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4794 lowerRMW(RMW); 4706 lowerRMW(RMW);
4795 } else { 4707 } else {
4796 TargetLowering::lowerOther(Instr); 4708 TargetLowering::lowerOther(Instr);
4797 } 4709 }
4798 } 4710 }
4799 4711
4800 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4712 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4801 // preserve integrity of liveness analysis. Undef values are also 4713 // preserve integrity of liveness analysis. Undef values are also
4802 // turned into zeroes, since loOperand() and hiOperand() don't expect 4714 // turned into zeroes, since loOperand() and hiOperand() don't expect
4803 // Undef input. 4715 // Undef input.
4804 void TargetX8632::prelowerPhis() { 4716 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4805 // Pause constant blinding or pooling, blinding or pooling will be done later 4717 // Pause constant blinding or pooling, blinding or pooling will be done later
4806 // during phi lowering assignments 4718 // during phi lowering assignments
4807 BoolFlagSaver B(RandomizationPoolingPaused, true); 4719 BoolFlagSaver B(RandomizationPoolingPaused, true);
4808 4720
4809 CfgNode *Node = Context.getNode(); 4721 CfgNode *Node = Context.getNode();
4810 for (Inst &I : Node->getPhis()) { 4722 for (Inst &I : Node->getPhis()) {
4811 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4723 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4812 if (Phi->isDeleted()) 4724 if (Phi->isDeleted())
4813 continue; 4725 continue;
4814 Variable *Dest = Phi->getDest(); 4726 Variable *Dest = Phi->getDest();
(...skipping 10 matching lines...) Expand all
4825 PhiLo->addArgument(loOperand(Src), Label); 4737 PhiLo->addArgument(loOperand(Src), Label);
4826 PhiHi->addArgument(hiOperand(Src), Label); 4738 PhiHi->addArgument(hiOperand(Src), Label);
4827 } 4739 }
4828 Node->getPhis().push_back(PhiLo); 4740 Node->getPhis().push_back(PhiLo);
4829 Node->getPhis().push_back(PhiHi); 4741 Node->getPhis().push_back(PhiHi);
4830 Phi->setDeleted(); 4742 Phi->setDeleted();
4831 } 4743 }
4832 } 4744 }
4833 } 4745 }
4834 4746
4835 namespace {
4836
4837 bool isMemoryOperand(const Operand *Opnd) { 4747 bool isMemoryOperand(const Operand *Opnd) {
4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd)) 4748 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4839 return !Var->hasReg(); 4749 return !Var->hasReg();
4840 // We treat vector undef values the same as a memory operand, 4750 // We treat vector undef values the same as a memory operand,
4841 // because they do in fact need a register to materialize the vector 4751 // because they do in fact need a register to materialize the vector
4842 // of zeroes into. 4752 // of zeroes into.
4843 if (llvm::isa<ConstantUndef>(Opnd)) 4753 if (llvm::isa<ConstantUndef>(Opnd))
4844 return isScalarFloatingType(Opnd->getType()) || 4754 return isScalarFloatingType(Opnd->getType()) ||
4845 isVectorType(Opnd->getType()); 4755 isVectorType(Opnd->getType());
4846 if (llvm::isa<Constant>(Opnd)) 4756 if (llvm::isa<Constant>(Opnd))
4847 return isScalarFloatingType(Opnd->getType()); 4757 return isScalarFloatingType(Opnd->getType());
4848 return true; 4758 return true;
4849 } 4759 }
4850 4760
4851 } // end of anonymous namespace
4852
4853 // Lower the pre-ordered list of assignments into mov instructions. 4761 // Lower the pre-ordered list of assignments into mov instructions.
4854 // Also has to do some ad-hoc register allocation as necessary. 4762 // Also has to do some ad-hoc register allocation as necessary.
4855 void TargetX8632::lowerPhiAssignments(CfgNode *Node, 4763 template <class Machine>
4856 const AssignList &Assignments) { 4764 void TargetX86Base<Machine>::lowerPhiAssignments(
4765 CfgNode *Node, const AssignList &Assignments) {
4857 // Check that this is a properly initialized shell of a node. 4766 // Check that this is a properly initialized shell of a node.
4858 assert(Node->getOutEdges().size() == 1); 4767 assert(Node->getOutEdges().size() == 1);
4859 assert(Node->getInsts().empty()); 4768 assert(Node->getInsts().empty());
4860 assert(Node->getPhis().empty()); 4769 assert(Node->getPhis().empty());
4861 CfgNode *Succ = Node->getOutEdges().front(); 4770 CfgNode *Succ = Node->getOutEdges().front();
4862 getContext().init(Node); 4771 getContext().init(Node);
4863 // Register set setup similar to regAlloc(). 4772 // Register set setup similar to regAlloc().
4864 RegSetMask RegInclude = RegSet_All; 4773 RegSetMask RegInclude = RegSet_All;
4865 RegSetMask RegExclude = RegSet_StackPointer; 4774 RegSetMask RegExclude = RegSet_StackPointer;
4866 if (hasFramePointer()) 4775 if (hasFramePointer())
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
4997 _br(Succ); 4906 _br(Succ);
4998 } 4907 }
4999 4908
5000 // There is no support for loading or emitting vector constants, so the 4909 // There is no support for loading or emitting vector constants, so the
5001 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4910 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
5002 // etc. are initialized with register operations. 4911 // etc. are initialized with register operations.
5003 // 4912 //
5004 // TODO(wala): Add limited support for vector constants so that 4913 // TODO(wala): Add limited support for vector constants so that
5005 // complex initialization in registers is unnecessary. 4914 // complex initialization in registers is unnecessary.
5006 4915
5007 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { 4916 template <class Machine>
4917 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
5008 Variable *Reg = makeReg(Ty, RegNum); 4918 Variable *Reg = makeReg(Ty, RegNum);
5009 // Insert a FakeDef, since otherwise the live range of Reg might 4919 // Insert a FakeDef, since otherwise the live range of Reg might
5010 // be overestimated. 4920 // be overestimated.
5011 Context.insert(InstFakeDef::create(Func, Reg)); 4921 Context.insert(InstFakeDef::create(Func, Reg));
5012 _pxor(Reg, Reg); 4922 _pxor(Reg, Reg);
5013 return Reg; 4923 return Reg;
5014 } 4924 }
5015 4925
5016 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { 4926 template <class Machine>
4927 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
4928 int32_t RegNum) {
5017 Variable *MinusOnes = makeReg(Ty, RegNum); 4929 Variable *MinusOnes = makeReg(Ty, RegNum);
5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated. 4930 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
5019 Context.insert(InstFakeDef::create(Func, MinusOnes)); 4931 Context.insert(InstFakeDef::create(Func, MinusOnes));
5020 _pcmpeq(MinusOnes, MinusOnes); 4932 _pcmpeq(MinusOnes, MinusOnes);
5021 return MinusOnes; 4933 return MinusOnes;
5022 } 4934 }
5023 4935
5024 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { 4936 template <class Machine>
4937 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
5025 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 4938 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
5026 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 4939 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
5027 _psub(Dest, MinusOne); 4940 _psub(Dest, MinusOne);
5028 return Dest; 4941 return Dest;
5029 } 4942 }
5030 4943
5031 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { 4944 template <class Machine>
4945 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
4946 int32_t RegNum) {
5032 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4947 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5033 Ty == IceType_v16i8); 4948 Ty == IceType_v16i8);
5034 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4949 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4950 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; 4951 SizeT Shift =
4952 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
5037 _psll(Reg, Ctx->getConstantInt8(Shift)); 4953 _psll(Reg, Ctx->getConstantInt8(Shift));
5038 return Reg; 4954 return Reg;
5039 } else { 4955 } else {
5040 // SSE has no left shift operation for vectors of 8 bit integers. 4956 // SSE has no left shift operation for vectors of 8 bit integers.
5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4957 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
5042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 4958 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
5043 Variable *Reg = makeReg(Ty, RegNum); 4959 Variable *Reg = makeReg(Ty, RegNum);
5044 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4960 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4961 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5046 return Reg; 4962 return Reg;
5047 } 4963 }
5048 } 4964 }
5049 4965
5050 // Construct a mask in a register that can be and'ed with a 4966 // Construct a mask in a register that can be and'ed with a
5051 // floating-point value to mask off its sign bit. The value will be 4967 // floating-point value to mask off its sign bit. The value will be
5052 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> 4968 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
5053 // for f64. Construct it as vector of ones logically right shifted 4969 // for f64. Construct it as vector of ones logically right shifted
5054 // one bit. TODO(stichnot): Fix the wala TODO above, to represent 4970 // one bit. TODO(stichnot): Fix the wala TODO above, to represent
5055 // vector constants in memory. 4971 // vector constants in memory.
5056 Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) { 4972 template <class Machine>
4973 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4974 int32_t RegNum) {
5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4975 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
5058 _psrl(Reg, Ctx->getConstantInt8(1)); 4976 _psrl(Reg, Ctx->getConstantInt8(1));
5059 return Reg; 4977 return Reg;
5060 } 4978 }
5061 4979
5062 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 4980 template <class Machine>
5063 Variable *Slot, 4981 OperandX8632Mem *
5064 uint32_t Offset) { 4982 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4983 uint32_t Offset) {
5065 // Ensure that Loc is a stack slot. 4984 // Ensure that Loc is a stack slot.
5066 assert(Slot->getWeight().isZero()); 4985 assert(Slot->getWeight().isZero());
5067 assert(Slot->getRegNum() == Variable::NoRegister); 4986 assert(Slot->getRegNum() == Variable::NoRegister);
5068 // Compute the location of Loc in memory. 4987 // Compute the location of Loc in memory.
5069 // TODO(wala,stichnot): lea should not be required. The address of 4988 // TODO(wala,stichnot): lea should not be required. The address of
5070 // the stack slot is known at compile time (although not until after 4989 // the stack slot is known at compile time (although not until after
5071 // addProlog()). 4990 // addProlog()).
5072 const Type PointerType = IceType_i32; 4991 const Type PointerType = IceType_i32;
5073 Variable *Loc = makeReg(PointerType); 4992 Variable *Loc = makeReg(PointerType);
5074 _lea(Loc, Slot); 4993 _lea(Loc, Slot);
5075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 4994 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4995 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
5077 } 4996 }
5078 4997
5079 // Helper for legalize() to emit the right code to lower an operand to a 4998 // Helper for legalize() to emit the right code to lower an operand to a
5080 // register of the appropriate type. 4999 // register of the appropriate type.
5081 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 5000 template <class Machine>
5001 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5082 Type Ty = Src->getType(); 5002 Type Ty = Src->getType();
5083 Variable *Reg = makeReg(Ty, RegNum); 5003 Variable *Reg = makeReg(Ty, RegNum);
5084 if (isVectorType(Ty)) { 5004 if (isVectorType(Ty)) {
5085 _movp(Reg, Src); 5005 _movp(Reg, Src);
5086 } else { 5006 } else {
5087 _mov(Reg, Src); 5007 _mov(Reg, Src);
5088 } 5008 }
5089 return Reg; 5009 return Reg;
5090 } 5010 }
5091 5011
5092 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, 5012 template <class Machine>
5093 int32_t RegNum) { 5013 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
5014 int32_t RegNum) {
5094 Type Ty = From->getType(); 5015 Type Ty = From->getType();
5095 // Assert that a physical register is allowed. To date, all calls 5016 // Assert that a physical register is allowed. To date, all calls
5096 // to legalize() allow a physical register. If a physical register 5017 // to legalize() allow a physical register. If a physical register
5097 // needs to be explicitly disallowed, then new code will need to be 5018 // needs to be explicitly disallowed, then new code will need to be
5098 // written to force a spill. 5019 // written to force a spill.
5099 assert(Allowed & Legal_Reg); 5020 assert(Allowed & Legal_Reg);
5100 // If we're asking for a specific physical register, make sure we're 5021 // If we're asking for a specific physical register, make sure we're
5101 // not allowing any other operand kinds. (This could be future 5022 // not allowing any other operand kinds. (This could be future
5102 // work, e.g. allow the shl shift amount to be either an immediate 5023 // work, e.g. allow the shl shift amount to be either an immediate
5103 // or in ecx.) 5024 // or in ecx.)
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
5196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5117 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5197 From = copyToReg(From, RegNum); 5118 From = copyToReg(From, RegNum);
5198 } 5119 }
5199 return From; 5120 return From;
5200 } 5121 }
5201 llvm_unreachable("Unhandled operand kind in legalize()"); 5122 llvm_unreachable("Unhandled operand kind in legalize()");
5202 return From; 5123 return From;
5203 } 5124 }
5204 5125
5205 // Provide a trivial wrapper to legalize() for this common usage. 5126 // Provide a trivial wrapper to legalize() for this common usage.
5206 Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) { 5127 template <class Machine>
5128 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) {
5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 5129 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5208 } 5130 }
5209 5131
5210 // For the cmp instruction, if Src1 is an immediate, or known to be a 5132 // For the cmp instruction, if Src1 is an immediate, or known to be a
5211 // physical register, we can allow Src0 to be a memory operand. 5133 // physical register, we can allow Src0 to be a memory operand.
5212 // Otherwise, Src0 must be copied into a physical register. 5134 // Otherwise, Src0 must be copied into a physical register.
5213 // (Actually, either Src0 or Src1 can be chosen for the physical 5135 // (Actually, either Src0 or Src1 can be chosen for the physical
5214 // register, but unfortunately we have to commit to one or the other 5136 // register, but unfortunately we have to commit to one or the other
5215 // before register allocation.) 5137 // before register allocation.)
5216 Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { 5138 template <class Machine>
5139 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5140 Operand *Src1) {
5217 bool IsSrc1ImmOrReg = false; 5141 bool IsSrc1ImmOrReg = false;
5218 if (llvm::isa<Constant>(Src1)) { 5142 if (llvm::isa<Constant>(Src1)) {
5219 IsSrc1ImmOrReg = true; 5143 IsSrc1ImmOrReg = true;
5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 5144 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5221 if (Var->hasReg()) 5145 if (Var->hasReg())
5222 IsSrc1ImmOrReg = true; 5146 IsSrc1ImmOrReg = true;
5223 } 5147 }
5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 5148 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5225 } 5149 }
5226 5150
5227 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty, 5151 template <class Machine>
5228 bool DoLegalize) { 5152 OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd,
5153 Type Ty,
5154 bool DoLegalize) {
5229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); 5155 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);
5230 // It may be the case that address mode optimization already creates 5156 // It may be the case that address mode optimization already creates
5231 // an OperandX8632Mem, so in that case it wouldn't need another level 5157 // an OperandX8632Mem, so in that case it wouldn't need another level
5232 // of transformation. 5158 // of transformation.
5233 if (!Mem) { 5159 if (!Mem) {
5234 Variable *Base = llvm::dyn_cast<Variable>(Opnd); 5160 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); 5161 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5236 assert(Base || Offset); 5162 assert(Base || Offset);
5237 if (Offset) { 5163 if (Offset) {
5238 // During memory operand building, we do not blind or pool 5164 // During memory operand building, we do not blind or pool
(...skipping 11 matching lines...) Expand all
5250 llvm::isa<ConstantRelocatable>(Offset)); 5176 llvm::isa<ConstantRelocatable>(Offset));
5251 } 5177 }
5252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 5178 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
5253 } 5179 }
5254 // Do legalization, which contains randomization/pooling 5180 // Do legalization, which contains randomization/pooling
5255 // or do randomization/pooling. 5181 // or do randomization/pooling.
5256 return llvm::cast<OperandX8632Mem>( 5182 return llvm::cast<OperandX8632Mem>(
5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 5183 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5258 } 5184 }
5259 5185
5260 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { 5186 template <class Machine>
5187 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5261 // There aren't any 64-bit integer registers for x86-32. 5188 // There aren't any 64-bit integer registers for x86-32.
5262 assert(Type != IceType_i64); 5189 assert(Type != IceType_i64);
5263 Variable *Reg = Func->makeVariable(Type); 5190 Variable *Reg = Func->makeVariable(Type);
5264 if (RegNum == Variable::NoRegister) 5191 if (RegNum == Variable::NoRegister)
5265 Reg->setWeightInfinite(); 5192 Reg->setWeightInfinite();
5266 else 5193 else
5267 Reg->setRegNum(RegNum); 5194 Reg->setRegNum(RegNum);
5268 return Reg; 5195 return Reg;
5269 } 5196 }
5270 5197
5271 void TargetX8632::postLower() { 5198 template <class Machine> void TargetX86Base<Machine>::postLower() {
5272 if (Ctx->getFlags().getOptLevel() == Opt_m1) 5199 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5273 return; 5200 return;
5274 inferTwoAddress(); 5201 inferTwoAddress();
5275 } 5202 }
5276 5203
5277 void TargetX8632::makeRandomRegisterPermutation( 5204 template <class Machine>
5205 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5278 llvm::SmallVectorImpl<int32_t> &Permutation, 5206 llvm::SmallVectorImpl<int32_t> &Permutation,
5279 const llvm::SmallBitVector &ExcludeRegisters) const { 5207 const llvm::SmallBitVector &ExcludeRegisters) const {
5280 // TODO(stichnot): Declaring Permutation this way loses type/size 5208 // TODO(stichnot): Declaring Permutation this way loses type/size
5281 // information. Fix this in conjunction with the caller-side TODO. 5209 // information. Fix this in conjunction with the caller-side TODO.
5282 assert(Permutation.size() >= RegX8632::Reg_NUM); 5210 assert(Permutation.size() >= RegX8632::Reg_NUM);
5283 // Expected upper bound on the number of registers in a single 5211 // Expected upper bound on the number of registers in a single
5284 // equivalence class. For x86-32, this would comprise the 8 XMM 5212 // equivalence class. For x86-32, this would comprise the 8 XMM
5285 // registers. This is for performance, not correctness. 5213 // registers. This is for performance, not correctness.
5286 static const unsigned MaxEquivalenceClassSize = 8; 5214 static const unsigned MaxEquivalenceClassSize = 8;
5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; 5215 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
5334 if (!First) 5262 if (!First)
5335 Str << " "; 5263 Str << " ";
5336 First = false; 5264 First = false;
5337 Str << getRegName(Register, IceType_i32); 5265 Str << getRegName(Register, IceType_i32);
5338 } 5266 }
5339 Str << "}\n"; 5267 Str << "}\n";
5340 } 5268 }
5341 } 5269 }
5342 } 5270 }
5343 5271
5344 void TargetX8632::emit(const ConstantInteger32 *C) const { 5272 template <class Machine>
5273 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
5345 if (!ALLOW_DUMP) 5274 if (!ALLOW_DUMP)
5346 return; 5275 return;
5347 Ostream &Str = Ctx->getStrEmit(); 5276 Ostream &Str = Ctx->getStrEmit();
5348 Str << getConstantPrefix() << C->getValue(); 5277 Str << getConstantPrefix() << C->getValue();
5349 } 5278 }
5350 5279
5351 void TargetX8632::emit(const ConstantInteger64 *) const { 5280 template <class Machine>
5281 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 5282 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5353 } 5283 }
5354 5284
5355 void TargetX8632::emit(const ConstantFloat *C) const { 5285 template <class Machine>
5286 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
5356 if (!ALLOW_DUMP) 5287 if (!ALLOW_DUMP)
5357 return; 5288 return;
5358 Ostream &Str = Ctx->getStrEmit(); 5289 Ostream &Str = Ctx->getStrEmit();
5359 C->emitPoolLabel(Str); 5290 C->emitPoolLabel(Str);
5360 } 5291 }
5361 5292
5362 void TargetX8632::emit(const ConstantDouble *C) const { 5293 template <class Machine>
5294 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
5363 if (!ALLOW_DUMP) 5295 if (!ALLOW_DUMP)
5364 return; 5296 return;
5365 Ostream &Str = Ctx->getStrEmit(); 5297 Ostream &Str = Ctx->getStrEmit();
5366 C->emitPoolLabel(Str); 5298 C->emitPoolLabel(Str);
5367 } 5299 }
5368 5300
5369 void TargetX8632::emit(const ConstantUndef *) const { 5301 template <class Machine>
5302 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5370 llvm::report_fatal_error("undef value encountered by emitter."); 5303 llvm::report_fatal_error("undef value encountered by emitter.");
5371 } 5304 }
5372 5305
5373 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
5374 : TargetDataLowering(Ctx) {}
5375
5376 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
5377 const IceString &SectionSuffix) {
5378 switch (Ctx->getFlags().getOutFileType()) {
5379 case FT_Elf: {
5380 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
5382 } break;
5383 case FT_Asm:
5384 case FT_Iasm: {
5385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
5386 OstreamLocker L(Ctx);
5387 for (const VariableDeclaration *Var : Vars) {
5388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
5389 emitGlobal(*Var, SectionSuffix);
5390 }
5391 }
5392 } break;
5393 }
5394 }
5395
5396 template <typename T> struct PoolTypeConverter {};
5397
5398 template <> struct PoolTypeConverter<float> {
5399 typedef uint32_t PrimitiveIntType;
5400 typedef ConstantFloat IceType;
5401 static const Type Ty = IceType_f32;
5402 static const char *TypeName;
5403 static const char *AsmTag;
5404 static const char *PrintfString;
5405 };
5406 const char *PoolTypeConverter<float>::TypeName = "float";
5407 const char *PoolTypeConverter<float>::AsmTag = ".long";
5408 const char *PoolTypeConverter<float>::PrintfString = "0x%x";
5409
5410 template <> struct PoolTypeConverter<double> {
5411 typedef uint64_t PrimitiveIntType;
5412 typedef ConstantDouble IceType;
5413 static const Type Ty = IceType_f64;
5414 static const char *TypeName;
5415 static const char *AsmTag;
5416 static const char *PrintfString;
5417 };
5418 const char *PoolTypeConverter<double>::TypeName = "double";
5419 const char *PoolTypeConverter<double>::AsmTag = ".quad";
5420 const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
5421
5422 // Add converter for int type constant pooling
5423 template <> struct PoolTypeConverter<uint32_t> {
5424 typedef uint32_t PrimitiveIntType;
5425 typedef ConstantInteger32 IceType;
5426 static const Type Ty = IceType_i32;
5427 static const char *TypeName;
5428 static const char *AsmTag;
5429 static const char *PrintfString;
5430 };
5431 const char *PoolTypeConverter<uint32_t>::TypeName = "i32";
5432 const char *PoolTypeConverter<uint32_t>::AsmTag = ".long";
5433 const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x";
5434
5435 // Add converter for int type constant pooling
5436 template <> struct PoolTypeConverter<uint16_t> {
5437 typedef uint32_t PrimitiveIntType;
5438 typedef ConstantInteger32 IceType;
5439 static const Type Ty = IceType_i16;
5440 static const char *TypeName;
5441 static const char *AsmTag;
5442 static const char *PrintfString;
5443 };
5444 const char *PoolTypeConverter<uint16_t>::TypeName = "i16";
5445 const char *PoolTypeConverter<uint16_t>::AsmTag = ".short";
5446 const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x";
5447
5448 // Add converter for int type constant pooling
5449 template <> struct PoolTypeConverter<uint8_t> {
5450 typedef uint32_t PrimitiveIntType;
5451 typedef ConstantInteger32 IceType;
5452 static const Type Ty = IceType_i8;
5453 static const char *TypeName;
5454 static const char *AsmTag;
5455 static const char *PrintfString;
5456 };
5457 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
5458 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
5459 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
5460
5461 template <typename T>
5462 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
5463 if (!ALLOW_DUMP)
5464 return;
5465 Ostream &Str = Ctx->getStrEmit();
5466 Type Ty = T::Ty;
5467 SizeT Align = typeAlignInBytes(Ty);
5468 ConstantList Pool = Ctx->getConstantPool(Ty);
5469
5470 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
5471 << "\n";
5472 Str << "\t.align\t" << Align << "\n";
5473 for (Constant *C : Pool) {
5474 if (!C->getShouldBePooled())
5475 continue;
5476 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
5477 typename T::IceType::PrimType Value = Const->getValue();
5478 // Use memcpy() to copy bits from Value into RawValue in a way
5479 // that avoids breaking strict-aliasing rules.
5480 typename T::PrimitiveIntType RawValue;
5481 memcpy(&RawValue, &Value, sizeof(Value));
5482 char buf[30];
5483 int CharsPrinted =
5484 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
5485 assert(CharsPrinted >= 0 &&
5486 (size_t)CharsPrinted < llvm::array_lengthof(buf));
5487 (void)CharsPrinted; // avoid warnings if asserts are disabled
5488 Const->emitPoolLabel(Str);
5489 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
5490 << Value << "\n";
5491 }
5492 }
5493
5494 void TargetDataX8632::lowerConstants() {
5495 if (Ctx->getFlags().getDisableTranslation())
5496 return;
5497 // No need to emit constants from the int pool since (for x86) they
5498 // are embedded as immediates in the instructions, just emit float/double.
5499 switch (Ctx->getFlags().getOutFileType()) {
5500 case FT_Elf: {
5501 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5502
5503 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
5504 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
5505 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
5506
5507 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5508 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5509 } break;
5510 case FT_Asm:
5511 case FT_Iasm: {
5512 OstreamLocker L(Ctx);
5513
5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
5517
5518 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5519 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5520 } break;
5521 }
5522 }
5523
5524 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5525 : TargetHeaderLowering(Ctx) {}
5526
5527 // Randomize or pool an Immediate. 5306 // Randomize or pool an Immediate.
5528 Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate, 5307 template <class Machine>
5529 int32_t RegNum) { 5308 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5309 int32_t RegNum) {
5530 assert(llvm::isa<ConstantInteger32>(Immediate) || 5310 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5531 llvm::isa<ConstantRelocatable>(Immediate)); 5311 llvm::isa<ConstantRelocatable>(Immediate));
5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5312 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5533 RandomizationPoolingPaused == true) { 5313 RandomizationPoolingPaused == true) {
5534 // Immediates randomization/pooling off or paused 5314 // Immediates randomization/pooling off or paused
5535 return Immediate; 5315 return Immediate;
5536 } 5316 }
5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { 5317 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
5538 Ctx->statsUpdateRPImms(); 5318 Ctx->statsUpdateRPImms();
5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5319 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); 5375 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);
5596 _mov(Reg, MemOperand); 5376 _mov(Reg, MemOperand);
5597 return Reg; 5377 return Reg;
5598 } 5378 }
5599 assert("Unsupported -randomize-pool-immediates option" && false); 5379 assert("Unsupported -randomize-pool-immediates option" && false);
5600 } 5380 }
5601 // the constant Immediate is not eligible for blinding/pooling 5381 // the constant Immediate is not eligible for blinding/pooling
5602 return Immediate; 5382 return Immediate;
5603 } 5383 }
5604 5384
5385 template <class Machine>
5605 OperandX8632Mem * 5386 OperandX8632Mem *
5606 TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, 5387 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
5607 int32_t RegNum) { 5388 int32_t RegNum) {
5608 assert(MemOperand); 5389 assert(MemOperand);
5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5390 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5610 RandomizationPoolingPaused == true) { 5391 RandomizationPoolingPaused == true) {
5611 // immediates randomization/pooling is turned off 5392 // immediates randomization/pooling is turned off
5612 return MemOperand; 5393 return MemOperand;
5613 } 5394 }
5614 5395
5615 // If this memory operand is already a randommized one, we do 5396 // If this memory operand is already a randommized one, we do
5616 // not randomize it again. 5397 // not randomize it again.
5617 if (MemOperand->getRandomized()) 5398 if (MemOperand->getRandomized())
5618 return MemOperand; 5399 return MemOperand;
5619 5400
5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { 5401 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5621 if (C->shouldBeRandomizedOrPooled(Ctx)) { 5402 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5622 // The offset of this mem operand should be blinded or pooled 5403 // The offset of this mem operand should be blinded or pooled
5623 Ctx->statsUpdateRPImms(); 5404 Ctx->statsUpdateRPImms();
5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5405 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5625 RPI_Randomize) { 5406 RPI_Randomize) {
5626 // blind the constant offset 5407 // blind the constant offset
5627 // FROM: 5408 // FROM:
5628 // offset[base, index, shift] 5409 // offset[base, index, shift]
5629 // TO: 5410 // TO:
5630 // insert: lea offset+cookie[base], RegTemp 5411 // insert: lea offset+cookie[base], RegTemp
5631 // => -cookie[RegTemp, index, shift] 5412 // => -cookie[RegTemp, index, shift]
5632 uint32_t Value = 5413 uint32_t Value = llvm::dyn_cast<ConstantInteger32>(
5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) 5414 MemOperand->getOffset())->getValue();
5634 ->getValue();
5635 uint32_t Cookie = Ctx->getRandomizationCookie(); 5415 uint32_t Cookie = Ctx->getRandomizationCookie();
5636 Constant *Mask1 = Ctx->getConstantInt( 5416 Constant *Mask1 = Ctx->getConstantInt(
5637 MemOperand->getOffset()->getType(), Cookie + Value); 5417 MemOperand->getOffset()->getType(), Cookie + Value);
5638 Constant *Mask2 = 5418 Constant *Mask2 =
5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5419 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5640 5420
5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( 5421 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(
5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); 5422 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
5643 // If we have already assigned a physical register, we must come from 5423 // If we have already assigned a physical register, we must come from
5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5424 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
5710 return NewMemOperand; 5490 return NewMemOperand;
5711 } 5491 }
5712 assert("Unsupported -randomize-pool-immediates option" && false); 5492 assert("Unsupported -randomize-pool-immediates option" && false);
5713 } 5493 }
5714 } 5494 }
5715 // the offset is not eligible for blinding or pooling, return the original 5495 // the offset is not eligible for blinding or pooling, return the original
5716 // mem operand 5496 // mem operand
5717 return MemOperand; 5497 return MemOperand;
5718 } 5498 }
5719 5499
5500 } // end of namespace X86Internal
5720 } // end of namespace Ice 5501 } // end of namespace Ice
5502
5503 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698