Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(152)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/IceInst.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering ----------===//
Jim Stichnoth 2015/06/22 21:52:02 add the "-*- C++ -*-" stuff
John 2015/06/22 22:09:23 Done, but why? Also, this C++ thingy adds no real
Jim Stichnoth 2015/06/22 23:04:05 This is an emacs thing, so that it invokes c++-mod
John 2015/06/22 23:09:55 Use vim instead? :) Seriously, though, it is puzz
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX86Base class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. 12 // high-level instruction.
13 // 13 //
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
Jim Stichnoth 2015/06/22 21:52:02 Should there be an include guard?
John 2015/06/22 22:09:23 It doesn't hurt, but Impl files should not be incl
Jim Stichnoth 2015/06/22 23:04:05 OK, I see.
16 #include "llvm/Support/MathExtras.h" 16 #include "llvm/Support/MathExtras.h"
17 17
18 #include "IceCfg.h" 18 #include "IceCfg.h"
19 #include "IceCfgNode.h" 19 #include "IceCfgNode.h"
20 #include "IceClFlags.h" 20 #include "IceClFlags.h"
21 #include "IceDefs.h" 21 #include "IceDefs.h"
22 #include "IceELFObjectWriter.h" 22 #include "IceELFObjectWriter.h"
23 #include "IceGlobalInits.h" 23 #include "IceGlobalInits.h"
24 #include "IceInstX8632.h" 24 #include "IceInstX8632.h"
25 #include "IceLiveness.h" 25 #include "IceLiveness.h"
26 #include "IceOperand.h" 26 #include "IceOperand.h"
27 #include "IceRegistersX8632.h" 27 #include "IceRegistersX8632.h"
28 #include "IceTargetLoweringX8632.def" 28 #include "IceTargetLoweringX8632.def"
29 #include "IceTargetLoweringX8632.h" 29 #include "IceTargetLoweringX8632.h"
30 #include "IceUtils.h" 30 #include "IceUtils.h"
31 31
32 namespace Ice { 32 namespace Ice {
33 33 namespace X86Internal {
34 namespace {
35
36 // The following table summarizes the logic for lowering the fcmp
37 // instruction. There is one table entry for each of the 16 conditions.
38 //
39 // The first four columns describe the case when the operands are
40 // floating point scalar values. A comment in lowerFcmp() describes the
41 // lowering template. In the most general case, there is a compare
42 // followed by two conditional branches, because some fcmp conditions
43 // don't map to a single x86 conditional branch. However, in many cases
44 // it is possible to swap the operands in the comparison and have a
45 // single conditional branch. Since it's quite tedious to validate the
46 // table by hand, good execution tests are helpful.
47 //
48 // The last two columns describe the case when the operands are vectors
49 // of floating point values. For most fcmp conditions, there is a clear
50 // mapping to a single x86 cmpps instruction variant. Some fcmp
51 // conditions require special code to handle and these are marked in the
52 // table with a Cmpps_Invalid predicate.
53 const struct TableFcmp_ {
54 uint32_t Default;
55 bool SwapScalarOperands;
56 CondX86::BrCond C1, C2;
57 bool SwapVectorOperands;
58 CondX86::CmppsCond Predicate;
59 } TableFcmp[] = {
60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
62 ,
63 FCMPX8632_TABLE
64 #undef X
65 };
66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
67
68 // The following table summarizes the logic for lowering the icmp instruction
69 // for i32 and narrower types. Each icmp condition has a clear mapping to an
70 // x86 conditional branch instruction.
71
72 const struct TableIcmp32_ {
73 CondX86::BrCond Mapping;
74 } TableIcmp32[] = {
75 #define X(val, C_32, C1_64, C2_64, C3_64) \
76 { CondX86::C_32 } \
77 ,
78 ICMPX8632_TABLE
79 #undef X
80 };
81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
82
83 // The following table summarizes the logic for lowering the icmp instruction
84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
85 // conditional branches are needed. For the other conditions, three separate
86 // conditional branches are needed.
87 const struct TableIcmp64_ {
88 CondX86::BrCond C1, C2, C3;
89 } TableIcmp64[] = {
90 #define X(val, C_32, C1_64, C2_64, C3_64) \
91 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
92 ,
93 ICMPX8632_TABLE
94 #undef X
95 };
96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
97
98 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
99 size_t Index = static_cast<size_t>(Cond);
100 assert(Index < TableIcmp32Size);
101 return TableIcmp32[Index].Mapping;
102 }
103
104 const struct TableTypeX8632Attributes_ {
105 Type InVectorElementType;
106 } TableTypeX8632Attributes[] = {
107 #define X(tag, elementty, cvt, sdss, pack, width, fld) \
108 { elementty } \
109 ,
110 ICETYPEX8632_TABLE
111 #undef X
112 };
113 const size_t TableTypeX8632AttributesSize =
114 llvm::array_lengthof(TableTypeX8632Attributes);
115
116 // Return the type which the elements of the vector have in the X86
117 // representation of the vector.
118 Type getInVectorElementType(Type Ty) {
119 assert(isVectorType(Ty));
120 size_t Index = static_cast<size_t>(Ty);
121 (void)Index;
122 assert(Index < TableTypeX8632AttributesSize);
123 return TableTypeX8632Attributes[Ty].InVectorElementType;
124 }
125
126 // The maximum number of arguments to pass in XMM registers
127 const uint32_t X86_MAX_XMM_ARGS = 4;
128 // The number of bits in a byte
129 const uint32_t X86_CHAR_BIT = 8;
130 // Stack alignment
131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
132 // Size of the return address on the stack
133 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
134 // The number of different NOP instructions
135 const uint32_t X86_NUM_NOP_VARIANTS = 5;
136
137 // Value is in bytes. Return Value adjusted to the next highest multiple
138 // of the stack alignment.
139 uint32_t applyStackAlignment(uint32_t Value) {
140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
141 }
142
143 // In some cases, there are x-macros tables for both high-level and
144 // low-level instructions/operands that use the same enum key value.
145 // The tables are kept separate to maintain a proper separation
146 // between abstraction layers. There is a risk that the tables could
147 // get out of sync if enum values are reordered or if entries are
148 // added or deleted. The following dummy namespaces use
149 // static_asserts to ensure everything is kept in sync.
150
151 // Validate the enum values in FCMPX8632_TABLE.
152 namespace dummy1 {
153 // Define a temporary set of enum values based on low-level table
154 // entries.
155 enum _tmp_enum {
156 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
157 FCMPX8632_TABLE
158 #undef X
159 _num
160 };
161 // Define a set of constants based on high-level table entries.
162 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
163 ICEINSTFCMP_TABLE
164 #undef X
165 // Define a set of constants based on low-level table entries, and
166 // ensure the table entry keys are consistent.
167 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
168 static const int _table2_##val = _tmp_##val; \
169 static_assert( \
170 _table1_##val == _table2_##val, \
171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
172 FCMPX8632_TABLE
173 #undef X
174 // Repeat the static asserts with respect to the high-level table
175 // entries in case the high-level table has extra entries.
176 #define X(tag, str) \
177 static_assert( \
178 _table1_##tag == _table2_##tag, \
179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
180 ICEINSTFCMP_TABLE
181 #undef X
182 } // end of namespace dummy1
183
184 // Validate the enum values in ICMPX8632_TABLE.
185 namespace dummy2 {
186 // Define a temporary set of enum values based on low-level table
187 // entries.
188 enum _tmp_enum {
189 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
190 ICMPX8632_TABLE
191 #undef X
192 _num
193 };
194 // Define a set of constants based on high-level table entries.
195 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
196 ICEINSTICMP_TABLE
197 #undef X
198 // Define a set of constants based on low-level table entries, and
199 // ensure the table entry keys are consistent.
200 #define X(val, C_32, C1_64, C2_64, C3_64) \
201 static const int _table2_##val = _tmp_##val; \
202 static_assert( \
203 _table1_##val == _table2_##val, \
204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
205 ICMPX8632_TABLE
206 #undef X
207 // Repeat the static asserts with respect to the high-level table
208 // entries in case the high-level table has extra entries.
209 #define X(tag, str) \
210 static_assert( \
211 _table1_##tag == _table2_##tag, \
212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
213 ICEINSTICMP_TABLE
214 #undef X
215 } // end of namespace dummy2
216
217 // Validate the enum values in ICETYPEX8632_TABLE.
218 namespace dummy3 {
219 // Define a temporary set of enum values based on low-level table
220 // entries.
221 enum _tmp_enum {
222 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
223 ICETYPEX8632_TABLE
224 #undef X
225 _num
226 };
227 // Define a set of constants based on high-level table entries.
228 #define X(tag, size, align, elts, elty, str) \
229 static const int _table1_##tag = tag;
230 ICETYPE_TABLE
231 #undef X
232 // Define a set of constants based on low-level table entries, and
233 // ensure the table entry keys are consistent.
234 #define X(tag, elementty, cvt, sdss, pack, width, fld) \
235 static const int _table2_##tag = _tmp_##tag; \
236 static_assert(_table1_##tag == _table2_##tag, \
237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
238 ICETYPEX8632_TABLE
239 #undef X
240 // Repeat the static asserts with respect to the high-level table
241 // entries in case the high-level table has extra entries.
242 #define X(tag, size, align, elts, elty, str) \
243 static_assert(_table1_##tag == _table2_##tag, \
244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
245 ICETYPE_TABLE
246 #undef X
247 } // end of namespace dummy3
248 34
249 // A helper class to ease the settings of RandomizationPoolingPause 35 // A helper class to ease the settings of RandomizationPoolingPause
250 // to disable constant blinding or pooling for some translation phases. 36 // to disable constant blinding or pooling for some translation phases.
251 class BoolFlagSaver { 37 class BoolFlagSaver {
252 BoolFlagSaver() = delete; 38 BoolFlagSaver() = delete;
253 BoolFlagSaver(const BoolFlagSaver &) = delete; 39 BoolFlagSaver(const BoolFlagSaver &) = delete;
254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 40 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
255 41
256 public: 42 public:
257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 43 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
258 ~BoolFlagSaver() { Flag = OldValue; } 44 ~BoolFlagSaver() { Flag = OldValue; }
259 45
260 private: 46 private:
261 const bool OldValue; 47 const bool OldValue;
262 bool &Flag; 48 bool &Flag;
263 }; 49 };
264 50
265 } // end of anonymous namespace 51 template <class MachineTraits> class BoolFoldingEntry {
52 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
266 53
267 BoolFoldingEntry::BoolFoldingEntry(Inst *I) 54 public:
268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {} 55 BoolFoldingEntry() = default;
56 explicit BoolFoldingEntry(Inst *I);
57 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
58 // Instr is the instruction producing the i1-type variable of interest.
59 Inst *Instr = nullptr;
60 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
61 bool IsComplex = false;
62 // IsLiveOut is initialized conservatively to true, and is set to false when
63 // we encounter an instruction that ends Var's live range. We disable the
64 // folding optimization when Var is live beyond this basic block. Note that
65 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
66 // always be true and the folding optimization will never be performed.
67 bool IsLiveOut = true;
68 // NumUses counts the number of times Var is used as a source operand in the
69 // basic block. If IsComplex is true and there is more than one use of Var,
70 // then the folding optimization is disabled for Var.
71 uint32_t NumUses = 0;
72 };
269 73
270 BoolFolding::BoolFoldingProducerKind 74 template <class MachineTraits> class BoolFolding {
271 BoolFolding::getProducerKind(const Inst *Instr) { 75 public:
76 enum BoolFoldingProducerKind {
77 PK_None,
78 PK_Icmp32,
79 PK_Icmp64,
80 PK_Fcmp,
81 PK_Trunc
82 };
83
84 // Currently the actual enum values are not used (other than CK_None), but we
85 // go
86 // ahead and produce them anyway for symmetry with the
87 // BoolFoldingProducerKind.
88 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
89
90 private:
91 BoolFolding(const BoolFolding &) = delete;
92 BoolFolding &operator=(const BoolFolding &) = delete;
93
94 public:
95 BoolFolding() = default;
96 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
97 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
98 static bool hasComplexLowering(const Inst *Instr);
99 void init(CfgNode *Node);
100 const Inst *getProducerFor(const Operand *Opnd) const;
101 void dump(const Cfg *Func) const;
102
103 private:
104 // Returns true if Producers contains a valid entry for the given VarNum.
105 bool containsValid(SizeT VarNum) const {
106 auto Element = Producers.find(VarNum);
107 return Element != Producers.end() && Element->second.Instr != nullptr;
108 }
109 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
110 // Producers maps Variable::Number to a BoolFoldingEntry.
111 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
112 };
113
114 template <class MachineTraits>
115 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
116 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
117
118 template <class MachineTraits>
119 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
120 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
272 if (llvm::isa<InstIcmp>(Instr)) { 121 if (llvm::isa<InstIcmp>(Instr)) {
273 if (Instr->getSrc(0)->getType() != IceType_i64) 122 if (Instr->getSrc(0)->getType() != IceType_i64)
274 return PK_Icmp32; 123 return PK_Icmp32;
275 return PK_None; // TODO(stichnot): actually PK_Icmp64; 124 return PK_None; // TODO(stichnot): actually PK_Icmp64;
276 } 125 }
277 return PK_None; // TODO(stichnot): remove this 126 return PK_None; // TODO(stichnot): remove this
278 127
279 if (llvm::isa<InstFcmp>(Instr)) 128 if (llvm::isa<InstFcmp>(Instr))
280 return PK_Fcmp; 129 return PK_Fcmp;
281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 130 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
282 switch (Cast->getCastKind()) { 131 switch (Cast->getCastKind()) {
283 default: 132 default:
284 return PK_None; 133 return PK_None;
285 case InstCast::Trunc: 134 case InstCast::Trunc:
286 return PK_Trunc; 135 return PK_Trunc;
287 } 136 }
288 } 137 }
289 return PK_None; 138 return PK_None;
290 } 139 }
291 140
292 BoolFolding::BoolFoldingConsumerKind 141 template <class MachineTraits>
293 BoolFolding::getConsumerKind(const Inst *Instr) { 142 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
143 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {
294 if (llvm::isa<InstBr>(Instr)) 144 if (llvm::isa<InstBr>(Instr))
295 return CK_Br; 145 return CK_Br;
296 if (llvm::isa<InstSelect>(Instr)) 146 if (llvm::isa<InstSelect>(Instr))
297 return CK_Select; 147 return CK_Select;
298 return CK_None; // TODO(stichnot): remove this 148 return CK_None; // TODO(stichnot): remove this
299 149
300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 150 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
301 switch (Cast->getCastKind()) { 151 switch (Cast->getCastKind()) {
302 default: 152 default:
303 return CK_None; 153 return CK_None;
304 case InstCast::Sext: 154 case InstCast::Sext:
305 return CK_Sext; 155 return CK_Sext;
306 case InstCast::Zext: 156 case InstCast::Zext:
307 return CK_Zext; 157 return CK_Zext;
308 } 158 }
309 } 159 }
310 return CK_None; 160 return CK_None;
311 } 161 }
312 162
313 // Returns true if the producing instruction has a "complex" lowering 163 // Returns true if the producing instruction has a "complex" lowering
314 // sequence. This generally means that its lowering sequence requires 164 // sequence. This generally means that its lowering sequence requires
315 // more than one conditional branch, namely 64-bit integer compares 165 // more than one conditional branch, namely 64-bit integer compares
316 // and some floating-point compares. When this is true, and there is 166 // and some floating-point compares. When this is true, and there is
317 // more than one consumer, we prefer to disable the folding 167 // more than one consumer, we prefer to disable the folding
318 // optimization because it minimizes branches. 168 // optimization because it minimizes branches.
319 bool BoolFolding::hasComplexLowering(const Inst *Instr) { 169 template <class MachineTraits>
170 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
320 switch (getProducerKind(Instr)) { 171 switch (getProducerKind(Instr)) {
321 default: 172 default:
322 return false; 173 return false;
323 case PK_Icmp64: 174 case PK_Icmp64:
324 return true; 175 return true;
325 case PK_Fcmp: 176 case PK_Fcmp:
326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != 177 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
327 CondX86::Br_None; 178 .C2 != CondX86::Br_None;
328 } 179 }
329 } 180 }
330 181
331 void BoolFolding::init(CfgNode *Node) { 182 template <class MachineTraits>
183 void BoolFolding<MachineTraits>::init(CfgNode *Node) {
332 Producers.clear(); 184 Producers.clear();
333 for (Inst &Instr : Node->getInsts()) { 185 for (Inst &Instr : Node->getInsts()) {
334 // Check whether Instr is a valid producer. 186 // Check whether Instr is a valid producer.
335 Variable *Var = Instr.getDest(); 187 Variable *Var = Instr.getDest();
336 if (!Instr.isDeleted() // only consider non-deleted instructions 188 if (!Instr.isDeleted() // only consider non-deleted instructions
337 && Var // only instructions with an actual dest var 189 && Var // only instructions with an actual dest var
338 && Var->getType() == IceType_i1 // only bool-type dest vars 190 && Var->getType() == IceType_i1 // only bool-type dest vars
339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions 191 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr); 192 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
341 } 193 }
342 // Check each src variable against the map. 194 // Check each src variable against the map.
343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { 195 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
344 Operand *Src = Instr.getSrc(I); 196 Operand *Src = Instr.getSrc(I);
345 SizeT NumVars = Src->getNumVars(); 197 SizeT NumVars = Src->getNumVars();
346 for (SizeT J = 0; J < NumVars; ++J) { 198 for (SizeT J = 0; J < NumVars; ++J) {
347 const Variable *Var = Src->getVar(J); 199 const Variable *Var = Src->getVar(J);
348 SizeT VarNum = Var->getIndex(); 200 SizeT VarNum = Var->getIndex();
349 if (containsValid(VarNum)) { 201 if (containsValid(VarNum)) {
350 if (I != 0 // All valid consumers use Var as the first source operand 202 if (I != 0 // All valid consumers use Var as the first source operand
(...skipping 21 matching lines...) Expand all
372 continue; 224 continue;
373 } 225 }
374 // Mark as "dead" rather than outright deleting. This is so that 226 // Mark as "dead" rather than outright deleting. This is so that
375 // other peephole style optimizations during or before lowering 227 // other peephole style optimizations during or before lowering
376 // have access to this instruction in undeleted form. See for 228 // have access to this instruction in undeleted form. See for
377 // example tryOptimizedCmpxchgCmpBr(). 229 // example tryOptimizedCmpxchgCmpBr().
378 I.second.Instr->setDead(); 230 I.second.Instr->setDead();
379 } 231 }
380 } 232 }
381 233
382 const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const { 234 template <class MachineTraits>
235 const Inst *
236 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
383 auto *Var = llvm::dyn_cast<const Variable>(Opnd); 237 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
384 if (Var == nullptr) 238 if (Var == nullptr)
385 return nullptr; 239 return nullptr;
386 SizeT VarNum = Var->getIndex(); 240 SizeT VarNum = Var->getIndex();
387 auto Element = Producers.find(VarNum); 241 auto Element = Producers.find(VarNum);
388 if (Element == Producers.end()) 242 if (Element == Producers.end())
389 return nullptr; 243 return nullptr;
390 return Element->second.Instr; 244 return Element->second.Instr;
391 } 245 }
392 246
393 void BoolFolding::dump(const Cfg *Func) const { 247 template <class MachineTraits>
248 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {
394 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) 249 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
395 return; 250 return;
396 OstreamLocker L(Func->getContext()); 251 OstreamLocker L(Func->getContext());
397 Ostream &Str = Func->getContext()->getStrDump(); 252 Ostream &Str = Func->getContext()->getStrDump();
398 for (auto &I : Producers) { 253 for (auto &I : Producers) {
399 if (I.second.Instr == nullptr) 254 if (I.second.Instr == nullptr)
400 continue; 255 continue;
401 Str << "Found foldable producer:\n "; 256 Str << "Found foldable producer:\n ";
402 I.second.Instr->dump(Func); 257 I.second.Instr->dump(Func);
403 Str << "\n"; 258 Str << "\n";
404 } 259 }
405 } 260 }
406 261
407 void TargetX8632::initNodeForLowering(CfgNode *Node) { 262 template <class Machine>
263 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {
408 FoldingInfo.init(Node); 264 FoldingInfo.init(Node);
409 FoldingInfo.dump(Func); 265 FoldingInfo.dump(Func);
410 } 266 }
411 267
412 TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) { 268 template <class Machine>
413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == 269 TargetX86Base<Machine>::TargetX86Base(Cfg *Func)
414 (TargetInstructionSet::X86InstructionSet_End - 270 : Machine(Func) {
415 TargetInstructionSet::X86InstructionSet_Begin), 271 static_assert(
416 "X86InstructionSet range different from TargetInstructionSet"); 272 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==
273 (TargetInstructionSet::X86InstructionSet_End -
274 TargetInstructionSet::X86InstructionSet_Begin),
275 "Traits::InstructionSet range different from TargetInstructionSet");
417 if (Func->getContext()->getFlags().getTargetInstructionSet() != 276 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
418 TargetInstructionSet::BaseInstructionSet) { 277 TargetInstructionSet::BaseInstructionSet) {
419 InstructionSet = static_cast<X86InstructionSet>( 278 InstructionSet = static_cast<typename Traits::InstructionSet>(
420 (Func->getContext()->getFlags().getTargetInstructionSet() - 279 (Func->getContext()->getFlags().getTargetInstructionSet() -
421 TargetInstructionSet::X86InstructionSet_Begin) + 280 TargetInstructionSet::X86InstructionSet_Begin) +
422 X86InstructionSet::Begin); 281 Traits::InstructionSet::Begin);
423 } 282 }
424 // TODO: Don't initialize IntegerRegisters and friends every time. 283 // TODO: Don't initialize IntegerRegisters and friends every time.
425 // Instead, initialize in some sort of static initializer for the 284 // Instead, initialize in some sort of static initializer for the
426 // class. 285 // class.
427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); 286 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); 287 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); 288 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); 289 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); 290 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
432 ScratchRegs.resize(RegX8632::Reg_NUM); 291 ScratchRegs.resize(RegX8632::Reg_NUM);
(...skipping 16 matching lines...) Expand all
449 TypeToRegisterSet[IceType_f64] = FloatRegisters; 308 TypeToRegisterSet[IceType_f64] = FloatRegisters;
450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 309 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 310 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; 311 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; 312 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; 313 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; 314 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 315 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
457 } 316 }
458 317
459 void TargetX8632::translateO2() { 318 template <class Machine> void TargetX86Base<Machine>::translateO2() {
460 TimerMarker T(TimerStack::TT_O2, Func); 319 TimerMarker T(TimerStack::TT_O2, Func);
461 320
462 if (!Ctx->getFlags().getPhiEdgeSplit()) { 321 if (!Ctx->getFlags().getPhiEdgeSplit()) {
463 // Lower Phi instructions. 322 // Lower Phi instructions.
464 Func->placePhiLoads(); 323 Func->placePhiLoads();
465 if (Func->hasError()) 324 if (Func->hasError())
466 return; 325 return;
467 Func->placePhiStores(); 326 Func->placePhiStores();
468 if (Func->hasError()) 327 if (Func->hasError())
469 return; 328 return;
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
561 // needed for searching for opportunities. 420 // needed for searching for opportunities.
562 Func->doBranchOpt(); 421 Func->doBranchOpt();
563 Func->dump("After branch optimization"); 422 Func->dump("After branch optimization");
564 423
565 // Nop insertion 424 // Nop insertion
566 if (Ctx->getFlags().shouldDoNopInsertion()) { 425 if (Ctx->getFlags().shouldDoNopInsertion()) {
567 Func->doNopInsertion(); 426 Func->doNopInsertion();
568 } 427 }
569 } 428 }
570 429
571 void TargetX8632::translateOm1() { 430 template <class Machine> void TargetX86Base<Machine>::translateOm1() {
572 TimerMarker T(TimerStack::TT_Om1, Func); 431 TimerMarker T(TimerStack::TT_Om1, Func);
573 432
574 Func->placePhiLoads(); 433 Func->placePhiLoads();
575 if (Func->hasError()) 434 if (Func->hasError())
576 return; 435 return;
577 Func->placePhiStores(); 436 Func->placePhiStores();
578 if (Func->hasError()) 437 if (Func->hasError())
579 return; 438 return;
580 Func->deletePhis(); 439 Func->deletePhis();
581 if (Func->hasError()) 440 if (Func->hasError())
(...skipping 16 matching lines...) Expand all
598 if (Func->hasError()) 457 if (Func->hasError())
599 return; 458 return;
600 Func->dump("After stack frame mapping"); 459 Func->dump("After stack frame mapping");
601 460
602 // Nop insertion 461 // Nop insertion
603 if (Ctx->getFlags().shouldDoNopInsertion()) { 462 if (Ctx->getFlags().shouldDoNopInsertion()) {
604 Func->doNopInsertion(); 463 Func->doNopInsertion();
605 } 464 }
606 } 465 }
607 466
608 namespace {
609
610 bool canRMW(const InstArithmetic *Arith) { 467 bool canRMW(const InstArithmetic *Arith) {
611 Type Ty = Arith->getDest()->getType(); 468 Type Ty = Arith->getDest()->getType();
612 // X86 vector instructions write to a register and have no RMW 469 // X86 vector instructions write to a register and have no RMW
613 // option. 470 // option.
614 if (isVectorType(Ty)) 471 if (isVectorType(Ty))
615 return false; 472 return false;
616 bool isI64 = Ty == IceType_i64; 473 bool isI64 = Ty == IceType_i64;
617 474
618 switch (Arith->getOp()) { 475 switch (Arith->getOp()) {
619 // Not handled for lack of simple lowering: 476 // Not handled for lack of simple lowering:
(...skipping 25 matching lines...) Expand all
645 return MemA->getBase() == MemB->getBase() && 502 return MemA->getBase() == MemB->getBase() &&
646 MemA->getOffset() == MemB->getOffset() && 503 MemA->getOffset() == MemB->getOffset() &&
647 MemA->getIndex() == MemB->getIndex() && 504 MemA->getIndex() == MemB->getIndex() &&
648 MemA->getShift() == MemB->getShift() && 505 MemA->getShift() == MemB->getShift() &&
649 MemA->getSegmentRegister() == MemB->getSegmentRegister(); 506 MemA->getSegmentRegister() == MemB->getSegmentRegister();
650 } 507 }
651 } 508 }
652 return false; 509 return false;
653 } 510 }
654 511
655 } // end of anonymous namespace 512 template <class Machine> void TargetX86Base<Machine>::findRMW() {
656
657 void TargetX8632::findRMW() {
658 Func->dump("Before RMW"); 513 Func->dump("Before RMW");
659 OstreamLocker L(Func->getContext()); 514 OstreamLocker L(Func->getContext());
660 Ostream &Str = Func->getContext()->getStrDump(); 515 Ostream &Str = Func->getContext()->getStrDump();
661 for (CfgNode *Node : Func->getNodes()) { 516 for (CfgNode *Node : Func->getNodes()) {
662 // Walk through the instructions, considering each sequence of 3 517 // Walk through the instructions, considering each sequence of 3
663 // instructions, and look for the particular RMW pattern. Note that this 518 // instructions, and look for the particular RMW pattern. Note that this
664 // search can be "broken" (false negatives) if there are intervening deleted 519 // search can be "broken" (false negatives) if there are intervening deleted
665 // instructions, or intervening instructions that could be safely moved out 520 // instructions, or intervening instructions that could be safely moved out
666 // of the way to reveal an RMW pattern. 521 // of the way to reveal an RMW pattern.
667 auto E = Node->getInsts().end(); 522 auto E = Node->getInsts().end();
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( 594 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); 595 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
741 Node->getInsts().insert(I3, RMW); 596 Node->getInsts().insert(I3, RMW);
742 } 597 }
743 } 598 }
744 } 599 }
745 } 600 }
746 } 601 }
747 } 602 }
748 603
749 namespace {
750
751 // Converts a ConstantInteger32 operand into its constant value, or 604 // Converts a ConstantInteger32 operand into its constant value, or
752 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 605 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
753 uint64_t getConstantMemoryOrder(Operand *Opnd) { 606 uint64_t getConstantMemoryOrder(Operand *Opnd) {
754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 607 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
755 return Integer->getValue(); 608 return Integer->getValue();
756 return Intrinsics::MemoryOrderInvalid; 609 return Intrinsics::MemoryOrderInvalid;
757 } 610 }
758 611
759 // Determines whether the dest of a Load instruction can be folded 612 // Determines whether the dest of a Load instruction can be folded
760 // into one of the src operands of a 2-operand instruction. This is 613 // into one of the src operands of a 2-operand instruction. This is
761 // true as long as the load dest matches exactly one of the binary 614 // true as long as the load dest matches exactly one of the binary
762 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if 615 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
763 // the answer is true. 616 // the answer is true.
764 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 617 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
765 Operand *&Src0, Operand *&Src1) { 618 Operand *&Src0, Operand *&Src1) {
766 if (Src0 == LoadDest && Src1 != LoadDest) { 619 if (Src0 == LoadDest && Src1 != LoadDest) {
767 Src0 = LoadSrc; 620 Src0 = LoadSrc;
768 return true; 621 return true;
769 } 622 }
770 if (Src0 != LoadDest && Src1 == LoadDest) { 623 if (Src0 != LoadDest && Src1 == LoadDest) {
771 Src1 = LoadSrc; 624 Src1 = LoadSrc;
772 return true; 625 return true;
773 } 626 }
774 return false; 627 return false;
775 } 628 }
776 629
777 } // end of anonymous namespace 630 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
778
779 void TargetX8632::doLoadOpt() {
780 for (CfgNode *Node : Func->getNodes()) { 631 for (CfgNode *Node : Func->getNodes()) {
781 Context.init(Node); 632 Context.init(Node);
782 while (!Context.atEnd()) { 633 while (!Context.atEnd()) {
783 Variable *LoadDest = nullptr; 634 Variable *LoadDest = nullptr;
784 Operand *LoadSrc = nullptr; 635 Operand *LoadSrc = nullptr;
785 Inst *CurInst = Context.getCur(); 636 Inst *CurInst = Context.getCur();
786 Inst *Next = Context.getNextInst(); 637 Inst *Next = Context.getNextInst();
787 // Determine whether the current instruction is a Load 638 // Determine whether the current instruction is a Load
788 // instruction or equivalent. 639 // instruction or equivalent.
789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 640 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
859 NewInst->spliceLivenessInfo(Next, CurInst); 710 NewInst->spliceLivenessInfo(Next, CurInst);
860 } 711 }
861 } 712 }
862 Context.advanceCur(); 713 Context.advanceCur();
863 Context.advanceNext(); 714 Context.advanceNext();
864 } 715 }
865 } 716 }
866 Func->dump("After load optimization"); 717 Func->dump("After load optimization");
867 } 718 }
868 719
869 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { 720 template <class Machine>
721 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { 722 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
871 return Br->optimizeBranch(NextNode); 723 return Br->optimizeBranch(NextNode);
872 } 724 }
873 return false; 725 return false;
874 } 726 }
875 727
876 IceString TargetX8632::RegNames[] = { 728 template <class Machine>
729 IceString TargetX86Base<Machine>::RegNames[] = {
877 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 730 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
878 frameptr, isI8, isInt, isFP) \ 731 frameptr, isI8, isInt, isFP) \
879 name, 732 name,
880 REGX8632_TABLE 733 REGX8632_TABLE
881 #undef X 734 #undef X
882 }; 735 };
883 736
884 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) { 737 template <class Machine>
738 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
885 if (Ty == IceType_void) 739 if (Ty == IceType_void)
886 Ty = IceType_i32; 740 Ty = IceType_i32;
887 if (PhysicalRegisters[Ty].empty()) 741 if (PhysicalRegisters[Ty].empty())
888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); 742 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
889 assert(RegNum < PhysicalRegisters[Ty].size()); 743 assert(RegNum < PhysicalRegisters[Ty].size());
890 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 744 Variable *Reg = PhysicalRegisters[Ty][RegNum];
891 if (Reg == nullptr) { 745 if (Reg == nullptr) {
892 Reg = Func->makeVariable(Ty); 746 Reg = Func->makeVariable(Ty);
893 Reg->setRegNum(RegNum); 747 Reg->setRegNum(RegNum);
894 PhysicalRegisters[Ty][RegNum] = Reg; 748 PhysicalRegisters[Ty][RegNum] = Reg;
895 // Specially mark esp as an "argument" so that it is considered 749 // Specially mark esp as an "argument" so that it is considered
896 // live upon function entry. 750 // live upon function entry.
897 if (RegNum == RegX8632::Reg_esp) { 751 if (RegNum == RegX8632::Reg_esp) {
898 Func->addImplicitArg(Reg); 752 Func->addImplicitArg(Reg);
899 Reg->setIgnoreLiveness(); 753 Reg->setIgnoreLiveness();
900 } 754 }
901 } 755 }
902 return Reg; 756 return Reg;
903 } 757 }
904 758
905 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { 759 template <class Machine>
760 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
906 assert(RegNum < RegX8632::Reg_NUM); 761 assert(RegNum < RegX8632::Reg_NUM);
907 static IceString RegNames8[] = { 762 static IceString RegNames8[] = {
908 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 763 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
909 frameptr, isI8, isInt, isFP) \ 764 frameptr, isI8, isInt, isFP) \
910 name8, 765 name8,
911 REGX8632_TABLE 766 REGX8632_TABLE
912 #undef X 767 #undef X
913 }; 768 };
914 static IceString RegNames16[] = { 769 static IceString RegNames16[] = {
915 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 770 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
916 frameptr, isI8, isInt, isFP) \ 771 frameptr, isI8, isInt, isFP) \
917 name16, 772 name16,
918 REGX8632_TABLE 773 REGX8632_TABLE
919 #undef X 774 #undef X
920 }; 775 };
921 switch (Ty) { 776 switch (Ty) {
922 case IceType_i1: 777 case IceType_i1:
923 case IceType_i8: 778 case IceType_i8:
924 return RegNames8[RegNum]; 779 return RegNames8[RegNum];
925 case IceType_i16: 780 case IceType_i16:
926 return RegNames16[RegNum]; 781 return RegNames16[RegNum];
927 default: 782 default:
928 return RegNames[RegNum]; 783 return RegNames[RegNum];
929 } 784 }
930 } 785 }
931 786
932 void TargetX8632::emitVariable(const Variable *Var) const { 787 template <class Machine>
788 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
933 Ostream &Str = Ctx->getStrEmit(); 789 Ostream &Str = Ctx->getStrEmit();
934 if (Var->hasReg()) { 790 if (Var->hasReg()) {
935 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 791 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
936 return; 792 return;
937 } 793 }
938 if (Var->getWeight().isInf()) { 794 if (Var->getWeight().isInf()) {
939 llvm_unreachable("Infinite-weight Variable has no register assigned"); 795 llvm_unreachable("Infinite-weight Variable has no register assigned");
940 } 796 }
941 int32_t Offset = Var->getStackOffset(); 797 int32_t Offset = Var->getStackOffset();
942 if (!hasFramePointer()) 798 if (!hasFramePointer())
943 Offset += getStackAdjustment(); 799 Offset += getStackAdjustment();
944 if (Offset) 800 if (Offset)
945 Str << Offset; 801 Str << Offset;
946 const Type FrameSPTy = IceType_i32; 802 const Type FrameSPTy = IceType_i32;
947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; 803 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
948 } 804 }
949 805
950 X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const { 806 template <class Machine>
807 X8632::Address
808 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
951 if (Var->hasReg()) 809 if (Var->hasReg())
952 llvm_unreachable("Stack Variable has a register assigned"); 810 llvm_unreachable("Stack Variable has a register assigned");
953 if (Var->getWeight().isInf()) { 811 if (Var->getWeight().isInf()) {
954 llvm_unreachable("Infinite-weight Variable has no register assigned"); 812 llvm_unreachable("Infinite-weight Variable has no register assigned");
955 } 813 }
956 int32_t Offset = Var->getStackOffset(); 814 int32_t Offset = Var->getStackOffset();
957 if (!hasFramePointer()) 815 if (!hasFramePointer())
958 Offset += getStackAdjustment(); 816 Offset += getStackAdjustment();
959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); 817 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
960 } 818 }
961 819
962 void TargetX8632::lowerArguments() { 820 template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
963 VarList &Args = Func->getArgs(); 821 VarList &Args = Func->getArgs();
964 // The first four arguments of vector type, regardless of their 822 // The first four arguments of vector type, regardless of their
965 // position relative to the other arguments in the argument list, are 823 // position relative to the other arguments in the argument list, are
966 // passed in registers xmm0 - xmm3. 824 // passed in registers xmm0 - xmm3.
967 unsigned NumXmmArgs = 0; 825 unsigned NumXmmArgs = 0;
968 826
969 Context.init(Func->getEntryNode()); 827 Context.init(Func->getEntryNode());
970 Context.setInsertPoint(Context.getCur()); 828 Context.setInsertPoint(Context.getCur());
971 829
972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; 830 for (SizeT I = 0, E = Args.size();
973 ++I) { 831 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
974 Variable *Arg = Args[I]; 832 Variable *Arg = Args[I];
975 Type Ty = Arg->getType(); 833 Type Ty = Arg->getType();
976 if (!isVectorType(Ty)) 834 if (!isVectorType(Ty))
977 continue; 835 continue;
978 // Replace Arg in the argument list with the home register. Then 836 // Replace Arg in the argument list with the home register. Then
979 // generate an instruction in the prolog to copy the home register 837 // generate an instruction in the prolog to copy the home register
980 // to the assigned location of Arg. 838 // to the assigned location of Arg.
981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; 839 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
982 ++NumXmmArgs; 840 ++NumXmmArgs;
983 Variable *RegisterArg = Func->makeVariable(Ty); 841 Variable *RegisterArg = Func->makeVariable(Ty);
(...skipping 10 matching lines...) Expand all
994 852
995 // Helper function for addProlog(). 853 // Helper function for addProlog().
996 // 854 //
997 // This assumes Arg is an argument passed on the stack. This sets the 855 // This assumes Arg is an argument passed on the stack. This sets the
998 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 856 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
999 // width. For an I64 arg that has been split into Lo and Hi components, 857 // width. For an I64 arg that has been split into Lo and Hi components,
1000 // it calls itself recursively on the components, taking care to handle 858 // it calls itself recursively on the components, taking care to handle
1001 // Lo first because of the little-endian architecture. Lastly, this 859 // Lo first because of the little-endian architecture. Lastly, this
1002 // function generates an instruction to copy Arg into its assigned 860 // function generates an instruction to copy Arg into its assigned
1003 // register if applicable. 861 // register if applicable.
1004 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 862 template <class Machine>
1005 size_t BasicFrameOffset, 863 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
1006 size_t &InArgsSizeBytes) { 864 Variable *FramePtr,
865 size_t BasicFrameOffset,
866 size_t &InArgsSizeBytes) {
1007 Variable *Lo = Arg->getLo(); 867 Variable *Lo = Arg->getLo();
1008 Variable *Hi = Arg->getHi(); 868 Variable *Hi = Arg->getHi();
1009 Type Ty = Arg->getType(); 869 Type Ty = Arg->getType();
1010 if (Lo && Hi && Ty == IceType_i64) { 870 if (Lo && Hi && Ty == IceType_i64) {
1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 871 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 872 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
1013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 873 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 874 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1015 return; 875 return;
1016 } 876 }
1017 if (isVectorType(Ty)) { 877 if (isVectorType(Ty)) {
1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); 878 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
1019 } 879 }
1020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 880 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
1021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 881 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1022 if (Arg->hasReg()) { 882 if (Arg->hasReg()) {
1023 assert(Ty != IceType_i64); 883 assert(Ty != IceType_i64);
1024 OperandX8632Mem *Mem = OperandX8632Mem::create( 884 OperandX8632Mem *Mem = OperandX8632Mem::create(
1025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 885 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
1026 if (isVectorType(Arg->getType())) { 886 if (isVectorType(Arg->getType())) {
1027 _movp(Arg, Mem); 887 _movp(Arg, Mem);
1028 } else { 888 } else {
1029 _mov(Arg, Mem); 889 _mov(Arg, Mem);
1030 } 890 }
1031 // This argument-copying instruction uses an explicit 891 // This argument-copying instruction uses an explicit
1032 // OperandX8632Mem operand instead of a Variable, so its 892 // OperandX8632Mem operand instead of a Variable, so its
1033 // fill-from-stack operation has to be tracked separately for 893 // fill-from-stack operation has to be tracked separately for
1034 // statistics. 894 // statistics.
1035 Ctx->statsUpdateFills(); 895 Ctx->statsUpdateFills();
1036 } 896 }
1037 } 897 }
1038 898
1039 Type TargetX8632::stackSlotType() { return IceType_i32; } 899 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
900 return IceType_i32;
901 }
1040 902
1041 void TargetX8632::addProlog(CfgNode *Node) { 903 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
1042 // Stack frame layout: 904 // Stack frame layout:
1043 // 905 //
1044 // +------------------------+ 906 // +------------------------+
1045 // | 1. return address | 907 // | 1. return address |
1046 // +------------------------+ 908 // +------------------------+
1047 // | 2. preserved registers | 909 // | 2. preserved registers |
1048 // +------------------------+ 910 // +------------------------+
1049 // | 3. padding | 911 // | 3. padding |
1050 // +------------------------+ 912 // +------------------------+
1051 // | 4. global spill area | 913 // | 4. global spill area |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
1140 _mov(ebp, esp); 1002 _mov(ebp, esp);
1141 // Keep ebp live for late-stage liveness analysis 1003 // Keep ebp live for late-stage liveness analysis
1142 // (e.g. asm-verbose mode). 1004 // (e.g. asm-verbose mode).
1143 Context.insert(InstFakeUse::create(Func, ebp)); 1005 Context.insert(InstFakeUse::create(Func, ebp));
1144 } 1006 }
1145 1007
1146 // Align the variables area. SpillAreaPaddingBytes is the size of 1008 // Align the variables area. SpillAreaPaddingBytes is the size of
1147 // the region after the preserved registers and before the spill areas. 1009 // the region after the preserved registers and before the spill areas.
1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals 1010 // LocalsSlotsPaddingBytes is the amount of padding between the globals
1149 // and locals area if they are separate. 1011 // and locals area if they are separate.
1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); 1012 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 1013 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1152 uint32_t SpillAreaPaddingBytes = 0; 1014 uint32_t SpillAreaPaddingBytes = 0;
1153 uint32_t LocalsSlotsPaddingBytes = 0; 1015 uint32_t LocalsSlotsPaddingBytes = 0;
1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, 1016 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
1155 SpillAreaAlignmentBytes, GlobalsSize, 1017 SpillAreaAlignmentBytes, GlobalsSize,
1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, 1018 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
1157 &LocalsSlotsPaddingBytes); 1019 &LocalsSlotsPaddingBytes);
1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 1020 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1159 uint32_t GlobalsAndSubsequentPaddingSize = 1021 uint32_t GlobalsAndSubsequentPaddingSize =
1160 GlobalsSize + LocalsSlotsPaddingBytes; 1022 GlobalsSize + LocalsSlotsPaddingBytes;
1161 1023
1162 // Align esp if necessary. 1024 // Align esp if necessary.
1163 if (NeedsStackAlignment) { 1025 if (NeedsStackAlignment) {
1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 1026 uint32_t StackOffset =
1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 1027 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1028 uint32_t StackSize =
1029 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1166 SpillAreaSizeBytes = StackSize - StackOffset; 1030 SpillAreaSizeBytes = StackSize - StackOffset;
1167 } 1031 }
1168 1032
1169 // Generate "sub esp, SpillAreaSizeBytes" 1033 // Generate "sub esp, SpillAreaSizeBytes"
1170 if (SpillAreaSizeBytes) 1034 if (SpillAreaSizeBytes)
1171 _sub(getPhysicalRegister(RegX8632::Reg_esp), 1035 _sub(getPhysicalRegister(RegX8632::Reg_esp),
1172 Ctx->getConstantInt32(SpillAreaSizeBytes)); 1036 Ctx->getConstantInt32(SpillAreaSizeBytes));
1173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 1037 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1174 1038
1175 resetStackAdjustment(); 1039 resetStackAdjustment();
1176 1040
1177 // Fill in stack offsets for stack args, and copy args into registers 1041 // Fill in stack offsets for stack args, and copy args into registers
1178 // for those that were register-allocated. Args are pushed right to 1042 // for those that were register-allocated. Args are pushed right to
1179 // left, so Arg[0] is closest to the stack/frame pointer. 1043 // left, so Arg[0] is closest to the stack/frame pointer.
1180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 1044 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 1045 size_t BasicFrameOffset =
1046 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
1182 if (!IsEbpBasedFrame) 1047 if (!IsEbpBasedFrame)
1183 BasicFrameOffset += SpillAreaSizeBytes; 1048 BasicFrameOffset += SpillAreaSizeBytes;
1184 1049
1185 const VarList &Args = Func->getArgs(); 1050 const VarList &Args = Func->getArgs();
1186 size_t InArgsSizeBytes = 0; 1051 size_t InArgsSizeBytes = 0;
1187 unsigned NumXmmArgs = 0; 1052 unsigned NumXmmArgs = 0;
1188 for (Variable *Arg : Args) { 1053 for (Variable *Arg : Args) {
1189 // Skip arguments passed in registers. 1054 // Skip arguments passed in registers.
1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 1055 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
1191 ++NumXmmArgs; 1056 ++NumXmmArgs;
1192 continue; 1057 continue;
1193 } 1058 }
1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 1059 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1195 } 1060 }
1196 1061
1197 // Fill in stack offsets for locals. 1062 // Fill in stack offsets for locals.
1198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 1063 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, 1064 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1200 IsEbpBasedFrame); 1065 IsEbpBasedFrame);
1201 // Assign stack offsets to variables that have been linked to spilled 1066 // Assign stack offsets to variables that have been linked to spilled
1202 // variables. 1067 // variables.
1203 for (Variable *Var : VariablesLinkedToSpillSlots) { 1068 for (Variable *Var : VariablesLinkedToSpillSlots) {
1204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); 1069 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
1205 Var->setStackOffset(Linked->getStackOffset()); 1070 Var->setStackOffset(Linked->getStackOffset());
1206 } 1071 }
1207 this->HasComputedFrame = true; 1072 this->HasComputedFrame = true;
1208 1073
1209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { 1074 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
1210 OstreamLocker L(Func->getContext()); 1075 OstreamLocker L(Func->getContext());
1211 Ostream &Str = Func->getContext()->getStrDump(); 1076 Ostream &Str = Func->getContext()->getStrDump();
1212 1077
1213 Str << "Stack layout:\n"; 1078 Str << "Stack layout:\n";
1214 uint32_t EspAdjustmentPaddingSize = 1079 uint32_t EspAdjustmentPaddingSize =
1215 SpillAreaSizeBytes - LocalsSpillAreaSize - 1080 SpillAreaSizeBytes - LocalsSpillAreaSize -
1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 1081 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 1082 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n" 1083 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 1084 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 1085 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1221 << " globals spill area = " << GlobalsSize << " bytes\n" 1086 << " globals spill area = " << GlobalsSize << " bytes\n"
1222 << " globals-locals spill areas intermediate padding = " 1087 << " globals-locals spill areas intermediate padding = "
1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 1088 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 1089 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1225 << " esp alignment padding = " << EspAdjustmentPaddingSize 1090 << " esp alignment padding = " << EspAdjustmentPaddingSize
1226 << " bytes\n"; 1091 << " bytes\n";
1227 1092
1228 Str << "Stack details:\n" 1093 Str << "Stack details:\n"
1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" 1094 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 1095 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 1096 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1232 << " bytes\n" 1097 << " bytes\n"
1233 << " is ebp based = " << IsEbpBasedFrame << "\n"; 1098 << " is ebp based = " << IsEbpBasedFrame << "\n";
1234 } 1099 }
1235 } 1100 }
1236 1101
1237 void TargetX8632::addEpilog(CfgNode *Node) { 1102 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
1238 InstList &Insts = Node->getInsts(); 1103 InstList &Insts = Node->getInsts();
1239 InstList::reverse_iterator RI, E; 1104 InstList::reverse_iterator RI, E;
1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 1105 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1241 if (llvm::isa<InstX8632Ret>(*RI)) 1106 if (llvm::isa<InstX8632Ret>(*RI))
1242 break; 1107 break;
1243 } 1108 }
1244 if (RI == E) 1109 if (RI == E)
1245 return; 1110 return;
1246 1111
1247 // Convert the reverse_iterator position into its corresponding 1112 // Convert the reverse_iterator position into its corresponding
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1280 1145
1281 if (!Ctx->getFlags().getUseSandboxing()) 1146 if (!Ctx->getFlags().getUseSandboxing())
1282 return; 1147 return;
1283 // Change the original ret instruction into a sandboxed return sequence. 1148 // Change the original ret instruction into a sandboxed return sequence.
1284 // t:ecx = pop 1149 // t:ecx = pop
1285 // bundle_lock 1150 // bundle_lock
1286 // and t, ~31 1151 // and t, ~31
1287 // jmp *t 1152 // jmp *t
1288 // bundle_unlock 1153 // bundle_unlock
1289 // FakeUse <original_ret_operand> 1154 // FakeUse <original_ret_operand>
1290 const SizeT BundleSize = 1 1155 const SizeT BundleSize =
1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 1156 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 1157 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
1293 _pop(T_ecx); 1158 _pop(T_ecx);
1294 _bundle_lock(); 1159 _bundle_lock();
1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); 1160 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
1296 _jmp(T_ecx); 1161 _jmp(T_ecx);
1297 _bundle_unlock(); 1162 _bundle_unlock();
1298 if (RI->getSrcSize()) { 1163 if (RI->getSrcSize()) {
1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1164 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1300 Context.insert(InstFakeUse::create(Func, RetValue)); 1165 Context.insert(InstFakeUse::create(Func, RetValue));
1301 } 1166 }
1302 RI->setDeleted(); 1167 RI->setDeleted();
1303 } 1168 }
1304 1169
1305 void TargetX8632::split64(Variable *Var) { 1170 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
1306 switch (Var->getType()) { 1171 switch (Var->getType()) {
1307 default: 1172 default:
1308 return; 1173 return;
1309 case IceType_i64: 1174 case IceType_i64:
1310 // TODO: Only consider F64 if we need to push each half when 1175 // TODO: Only consider F64 if we need to push each half when
1311 // passing as an argument to a function call. Note that each half 1176 // passing as an argument to a function call. Note that each half
1312 // is still typed as I32. 1177 // is still typed as I32.
1313 case IceType_f64: 1178 case IceType_f64:
1314 break; 1179 break;
1315 } 1180 }
(...skipping 10 matching lines...) Expand all
1326 Lo->setName(Func, Var->getName(Func) + "__lo"); 1191 Lo->setName(Func, Var->getName(Func) + "__lo");
1327 Hi->setName(Func, Var->getName(Func) + "__hi"); 1192 Hi->setName(Func, Var->getName(Func) + "__hi");
1328 } 1193 }
1329 Var->setLoHi(Lo, Hi); 1194 Var->setLoHi(Lo, Hi);
1330 if (Var->getIsArg()) { 1195 if (Var->getIsArg()) {
1331 Lo->setIsArg(); 1196 Lo->setIsArg();
1332 Hi->setIsArg(); 1197 Hi->setIsArg();
1333 } 1198 }
1334 } 1199 }
1335 1200
1336 Operand *TargetX8632::loOperand(Operand *Operand) { 1201 template <class Machine>
1202 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
1337 assert(Operand->getType() == IceType_i64 || 1203 assert(Operand->getType() == IceType_i64 ||
1338 Operand->getType() == IceType_f64); 1204 Operand->getType() == IceType_f64);
1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 1205 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1340 return Operand; 1206 return Operand;
1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1207 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1342 split64(Var); 1208 split64(Var);
1343 return Var->getLo(); 1209 return Var->getLo();
1344 } 1210 }
1345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1211 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1212 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 1213 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
1348 return legalize(ConstInt); 1214 return legalize(ConstInt);
1349 } 1215 }
1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1216 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1351 OperandX8632Mem *MemOperand = OperandX8632Mem::create( 1217 OperandX8632Mem *MemOperand = OperandX8632Mem::create(
1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 1218 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
1353 Mem->getShift(), Mem->getSegmentRegister()); 1219 Mem->getShift(), Mem->getSegmentRegister());
1354 // Test if we should randomize or pool the offset, if so randomize it or 1220 // Test if we should randomize or pool the offset, if so randomize it or
1355 // pool it then create mem operand with the blinded/pooled constant. 1221 // pool it then create mem operand with the blinded/pooled constant.
1356 // Otherwise, return the mem operand as ordinary mem operand. 1222 // Otherwise, return the mem operand as ordinary mem operand.
1357 return legalize(MemOperand); 1223 return legalize(MemOperand);
1358 } 1224 }
1359 llvm_unreachable("Unsupported operand type"); 1225 llvm_unreachable("Unsupported operand type");
1360 return nullptr; 1226 return nullptr;
1361 } 1227 }
1362 1228
1363 Operand *TargetX8632::hiOperand(Operand *Operand) { 1229 template <class Machine>
1230 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
1364 assert(Operand->getType() == IceType_i64 || 1231 assert(Operand->getType() == IceType_i64 ||
1365 Operand->getType() == IceType_f64); 1232 Operand->getType() == IceType_f64);
1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 1233 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1367 return Operand; 1234 return Operand;
1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1235 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1369 split64(Var); 1236 split64(Var);
1370 return Var->getHi(); 1237 return Var->getHi();
1371 } 1238 }
1372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1239 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1240 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
(...skipping 20 matching lines...) Expand all
1394 Mem->getShift(), Mem->getSegmentRegister()); 1261 Mem->getShift(), Mem->getSegmentRegister());
1395 // Test if the Offset is an eligible i32 constants for randomization and 1262 // Test if the Offset is an eligible i32 constants for randomization and
1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem 1263 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1397 // operand. 1264 // operand.
1398 return legalize(MemOperand); 1265 return legalize(MemOperand);
1399 } 1266 }
1400 llvm_unreachable("Unsupported operand type"); 1267 llvm_unreachable("Unsupported operand type");
1401 return nullptr; 1268 return nullptr;
1402 } 1269 }
1403 1270
1404 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, 1271 template <class Machine>
1405 RegSetMask Exclude) const { 1272 llvm::SmallBitVector
1273 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
1274 RegSetMask Exclude) const {
1406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); 1275 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
1407 1276
1408 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 1277 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
1409 frameptr, isI8, isInt, isFP) \ 1278 frameptr, isI8, isInt, isFP) \
1410 if (scratch && (Include & RegSet_CallerSave)) \ 1279 if (scratch && (Include & RegSet_CallerSave)) \
1411 Registers[RegX8632::val] = true; \ 1280 Registers[RegX8632::val] = true; \
1412 if (preserved && (Include & RegSet_CalleeSave)) \ 1281 if (preserved && (Include & RegSet_CalleeSave)) \
1413 Registers[RegX8632::val] = true; \ 1282 Registers[RegX8632::val] = true; \
1414 if (stackptr && (Include & RegSet_StackPointer)) \ 1283 if (stackptr && (Include & RegSet_StackPointer)) \
1415 Registers[RegX8632::val] = true; \ 1284 Registers[RegX8632::val] = true; \
1416 if (frameptr && (Include & RegSet_FramePointer)) \ 1285 if (frameptr && (Include & RegSet_FramePointer)) \
1417 Registers[RegX8632::val] = true; \ 1286 Registers[RegX8632::val] = true; \
1418 if (scratch && (Exclude & RegSet_CallerSave)) \ 1287 if (scratch && (Exclude & RegSet_CallerSave)) \
1419 Registers[RegX8632::val] = false; \ 1288 Registers[RegX8632::val] = false; \
1420 if (preserved && (Exclude & RegSet_CalleeSave)) \ 1289 if (preserved && (Exclude & RegSet_CalleeSave)) \
1421 Registers[RegX8632::val] = false; \ 1290 Registers[RegX8632::val] = false; \
1422 if (stackptr && (Exclude & RegSet_StackPointer)) \ 1291 if (stackptr && (Exclude & RegSet_StackPointer)) \
1423 Registers[RegX8632::val] = false; \ 1292 Registers[RegX8632::val] = false; \
1424 if (frameptr && (Exclude & RegSet_FramePointer)) \ 1293 if (frameptr && (Exclude & RegSet_FramePointer)) \
1425 Registers[RegX8632::val] = false; 1294 Registers[RegX8632::val] = false;
1426 1295
1427 REGX8632_TABLE 1296 REGX8632_TABLE
1428 1297
1429 #undef X 1298 #undef X
1430 1299
1431 return Registers; 1300 return Registers;
1432 } 1301 }
1433 1302
1434 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { 1303 template <class Machine>
1304 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
1435 IsEbpBasedFrame = true; 1305 IsEbpBasedFrame = true;
1436 // Conservatively require the stack to be aligned. Some stack 1306 // Conservatively require the stack to be aligned. Some stack
1437 // adjustment operations implemented below assume that the stack is 1307 // adjustment operations implemented below assume that the stack is
1438 // aligned before the alloca. All the alloca code ensures that the 1308 // aligned before the alloca. All the alloca code ensures that the
1439 // stack alignment is preserved after the alloca. The stack alignment 1309 // stack alignment is preserved after the alloca. The stack alignment
1440 // restriction can be relaxed in some cases. 1310 // restriction can be relaxed in some cases.
1441 NeedsStackAlignment = true; 1311 NeedsStackAlignment = true;
1442 1312
1443 // TODO(stichnot): minimize the number of adjustments of esp, etc. 1313 // TODO(stichnot): minimize the number of adjustments of esp, etc.
1444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 1314 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
1445 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 1315 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1446 Variable *Dest = Inst->getDest(); 1316 Variable *Dest = Inst->getDest();
1447 uint32_t AlignmentParam = Inst->getAlignInBytes(); 1317 uint32_t AlignmentParam = Inst->getAlignInBytes();
1448 // For default align=0, set it to the real value 1, to avoid any 1318 // For default align=0, set it to the real value 1, to avoid any
1449 // bit-manipulation problems below. 1319 // bit-manipulation problems below.
1450 AlignmentParam = std::max(AlignmentParam, 1u); 1320 AlignmentParam = std::max(AlignmentParam, 1u);
1451 1321
1452 // LLVM enforces power of 2 alignment. 1322 // LLVM enforces power of 2 alignment.
1453 assert(llvm::isPowerOf2_32(AlignmentParam)); 1323 assert(llvm::isPowerOf2_32(AlignmentParam));
1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES)); 1324 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
1455 1325
1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1326 uint32_t Alignment =
1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1327 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
1328 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
1458 _and(esp, Ctx->getConstantInt32(-Alignment)); 1329 _and(esp, Ctx->getConstantInt32(-Alignment));
1459 } 1330 }
1460 if (const auto *ConstantTotalSize = 1331 if (const auto *ConstantTotalSize =
1461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1332 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1462 uint32_t Value = ConstantTotalSize->getValue(); 1333 uint32_t Value = ConstantTotalSize->getValue();
1463 Value = Utils::applyAlignment(Value, Alignment); 1334 Value = Utils::applyAlignment(Value, Alignment);
1464 _sub(esp, Ctx->getConstantInt32(Value)); 1335 _sub(esp, Ctx->getConstantInt32(Value));
1465 } else { 1336 } else {
1466 // Non-constant sizes need to be adjusted to the next highest 1337 // Non-constant sizes need to be adjusted to the next highest
1467 // multiple of the required alignment at runtime. 1338 // multiple of the required alignment at runtime.
1468 Variable *T = makeReg(IceType_i32); 1339 Variable *T = makeReg(IceType_i32);
1469 _mov(T, TotalSize); 1340 _mov(T, TotalSize);
1470 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1341 _add(T, Ctx->getConstantInt32(Alignment - 1));
1471 _and(T, Ctx->getConstantInt32(-Alignment)); 1342 _and(T, Ctx->getConstantInt32(-Alignment));
1472 _sub(esp, T); 1343 _sub(esp, T);
1473 } 1344 }
1474 _mov(Dest, esp); 1345 _mov(Dest, esp);
1475 } 1346 }
1476 1347
1477 // Strength-reduce scalar integer multiplication by a constant (for 1348 // Strength-reduce scalar integer multiplication by a constant (for
1478 // i32 or narrower) for certain constants. The lea instruction can be 1349 // i32 or narrower) for certain constants. The lea instruction can be
1479 // used to multiply by 3, 5, or 9, and the lsh instruction can be used 1350 // used to multiply by 3, 5, or 9, and the lsh instruction can be used
1480 // to multiply by powers of 2. These can be combined such that 1351 // to multiply by powers of 2. These can be combined such that
1481 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, 1352 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1482 // combined with left-shifting by 2. 1353 // combined with left-shifting by 2.
1483 bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0, 1354 template <class Machine>
1484 int32_t Src1) { 1355 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1356 int32_t Src1) {
1485 // Disable this optimization for Om1 and O0, just to keep things 1357 // Disable this optimization for Om1 and O0, just to keep things
1486 // simple there. 1358 // simple there.
1487 if (Ctx->getFlags().getOptLevel() < Opt_1) 1359 if (Ctx->getFlags().getOptLevel() < Opt_1)
1488 return false; 1360 return false;
1489 Type Ty = Dest->getType(); 1361 Type Ty = Dest->getType();
1490 Variable *T = nullptr; 1362 Variable *T = nullptr;
1491 if (Src1 == -1) { 1363 if (Src1 == -1) {
1492 _mov(T, Src0); 1364 _mov(T, Src0);
1493 _neg(T); 1365 _neg(T);
1494 _mov(Dest, T); 1366 _mov(Dest, T);
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
1563 } 1435 }
1564 if (Count2) { 1436 if (Count2) {
1565 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1437 _shl(T, Ctx->getConstantInt(Ty, Count2));
1566 } 1438 }
1567 if (Src1IsNegative) 1439 if (Src1IsNegative)
1568 _neg(T); 1440 _neg(T);
1569 _mov(Dest, T); 1441 _mov(Dest, T);
1570 return true; 1442 return true;
1571 } 1443 }
1572 1444
1573 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1445 template <class Machine>
1446 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1574 Variable *Dest = Inst->getDest(); 1447 Variable *Dest = Inst->getDest();
1575 Operand *Src0 = legalize(Inst->getSrc(0)); 1448 Operand *Src0 = legalize(Inst->getSrc(0));
1576 Operand *Src1 = legalize(Inst->getSrc(1)); 1449 Operand *Src1 = legalize(Inst->getSrc(1));
1577 if (Inst->isCommutative()) { 1450 if (Inst->isCommutative()) {
1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1451 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1579 std::swap(Src0, Src1); 1452 std::swap(Src0, Src1);
1580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) 1453 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1581 std::swap(Src0, Src1); 1454 std::swap(Src0, Src1);
1582 } 1455 }
1583 if (Dest->getType() == IceType_i64) { 1456 if (Dest->getType() == IceType_i64) {
(...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after
1866 case InstArithmetic::Sub: { 1739 case InstArithmetic::Sub: {
1867 Variable *T = makeReg(Dest->getType()); 1740 Variable *T = makeReg(Dest->getType());
1868 _movp(T, Src0); 1741 _movp(T, Src0);
1869 _psub(T, Src1); 1742 _psub(T, Src1);
1870 _movp(Dest, T); 1743 _movp(Dest, T);
1871 } break; 1744 } break;
1872 case InstArithmetic::Mul: { 1745 case InstArithmetic::Mul: {
1873 bool TypesAreValidForPmull = 1746 bool TypesAreValidForPmull =
1874 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1747 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1875 bool InstructionSetIsValidForPmull = 1748 bool InstructionSetIsValidForPmull =
1876 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; 1749 Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1;
1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1750 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1878 Variable *T = makeReg(Dest->getType()); 1751 Variable *T = makeReg(Dest->getType());
1879 _movp(T, Src0); 1752 _movp(T, Src0);
1880 _pmull(T, Src1); 1753 _pmull(T, Src1);
1881 _movp(Dest, T); 1754 _movp(Dest, T);
1882 } else if (Dest->getType() == IceType_v4i32) { 1755 } else if (Dest->getType() == IceType_v4i32) {
1883 // Lowering sequence: 1756 // Lowering sequence:
1884 // Note: The mask arguments have index 0 on the left. 1757 // Note: The mask arguments have index 0 on the left.
1885 // 1758 //
1886 // movups T1, Src0 1759 // movups T1, Src0
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 1933 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
2061 uint32_t LogDiv = llvm::Log2_32(UDivisor); 1934 uint32_t LogDiv = llvm::Log2_32(UDivisor);
2062 Type Ty = Dest->getType(); 1935 Type Ty = Dest->getType();
2063 // LLVM does the following for dest=src/(1<<log): 1936 // LLVM does the following for dest=src/(1<<log):
2064 // t=src 1937 // t=src
2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not 1938 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2066 // shr t,typewidth-log 1939 // shr t,typewidth-log
2067 // add t,src 1940 // add t,src
2068 // sar t,log 1941 // sar t,log
2069 // dest=t 1942 // dest=t
2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); 1943 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2071 _mov(T, Src0); 1944 _mov(T, Src0);
2072 // If for some reason we are dividing by 1, just treat it 1945 // If for some reason we are dividing by 1, just treat it
2073 // like an assignment. 1946 // like an assignment.
2074 if (LogDiv > 0) { 1947 if (LogDiv > 0) {
2075 // The initial sar is unnecessary when dividing by 2. 1948 // The initial sar is unnecessary when dividing by 2.
2076 if (LogDiv > 1) 1949 if (LogDiv > 1)
2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 1950 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 1951 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
2079 _add(T, Src0); 1952 _add(T, Src0);
2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1953 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
2129 Type Ty = Dest->getType(); 2002 Type Ty = Dest->getType();
2130 // LLVM does the following for dest=src%(1<<log): 2003 // LLVM does the following for dest=src%(1<<log):
2131 // t=src 2004 // t=src
2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not 2005 // sar t,typewidth-1 // -1 if src is negative, 0 if not
2133 // shr t,typewidth-log 2006 // shr t,typewidth-log
2134 // add t,src 2007 // add t,src
2135 // and t, -(1<<log) 2008 // and t, -(1<<log)
2136 // sub t,src 2009 // sub t,src
2137 // neg t 2010 // neg t
2138 // dest=t 2011 // dest=t
2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); 2012 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
2140 // If for some reason we are dividing by 1, just assign 0. 2013 // If for some reason we are dividing by 1, just assign 0.
2141 if (LogDiv == 0) { 2014 if (LogDiv == 0) {
2142 _mov(Dest, Ctx->getConstantZero(Ty)); 2015 _mov(Dest, Ctx->getConstantZero(Ty));
2143 return; 2016 return;
2144 } 2017 }
2145 _mov(T, Src0); 2018 _mov(T, Src0);
2146 // The initial sar is unnecessary when dividing by 2. 2019 // The initial sar is unnecessary when dividing by 2.
2147 if (LogDiv > 1) 2020 if (LogDiv > 1)
2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 2021 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 2022 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
2197 Type Ty = Dest->getType(); 2070 Type Ty = Dest->getType();
2198 InstCall *Call = makeHelperCall( 2071 InstCall *Call = makeHelperCall(
2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 2072 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
2200 Call->addArg(Src0); 2073 Call->addArg(Src0);
2201 Call->addArg(Src1); 2074 Call->addArg(Src1);
2202 return lowerCall(Call); 2075 return lowerCall(Call);
2203 } 2076 }
2204 } 2077 }
2205 } 2078 }
2206 2079
2207 void TargetX8632::lowerAssign(const InstAssign *Inst) { 2080 template <class Machine>
2081 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
2208 Variable *Dest = Inst->getDest(); 2082 Variable *Dest = Inst->getDest();
2209 Operand *Src0 = Inst->getSrc(0); 2083 Operand *Src0 = Inst->getSrc(0);
2210 assert(Dest->getType() == Src0->getType()); 2084 assert(Dest->getType() == Src0->getType());
2211 if (Dest->getType() == IceType_i64) { 2085 if (Dest->getType() == IceType_i64) {
2212 Src0 = legalize(Src0); 2086 Src0 = legalize(Src0);
2213 Operand *Src0Lo = loOperand(Src0); 2087 Operand *Src0Lo = loOperand(Src0);
2214 Operand *Src0Hi = hiOperand(Src0); 2088 Operand *Src0Hi = hiOperand(Src0);
2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2089 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2090 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2217 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2091 Variable *T_Lo = nullptr, *T_Hi = nullptr;
(...skipping 24 matching lines...) Expand all
2242 // register or a scalar integer immediate. 2116 // register or a scalar integer immediate.
2243 RI = legalize(Src0, Legal_Reg | Legal_Imm); 2117 RI = legalize(Src0, Legal_Reg | Legal_Imm);
2244 } 2118 }
2245 if (isVectorType(Dest->getType())) 2119 if (isVectorType(Dest->getType()))
2246 _movp(Dest, RI); 2120 _movp(Dest, RI);
2247 else 2121 else
2248 _mov(Dest, RI); 2122 _mov(Dest, RI);
2249 } 2123 }
2250 } 2124 }
2251 2125
2252 void TargetX8632::lowerBr(const InstBr *Inst) { 2126 template <class Machine>
2127 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
2253 if (Inst->isUnconditional()) { 2128 if (Inst->isUnconditional()) {
2254 _br(Inst->getTargetUnconditional()); 2129 _br(Inst->getTargetUnconditional());
2255 return; 2130 return;
2256 } 2131 }
2257 Operand *Cond = Inst->getCondition(); 2132 Operand *Cond = Inst->getCondition();
2258 2133
2259 // Handle folding opportunities. 2134 // Handle folding opportunities.
2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 2135 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
2261 assert(Producer->isDeleted()); 2136 assert(Producer->isDeleted());
2262 switch (BoolFolding::getProducerKind(Producer)) { 2137 switch (BoolFolding::getProducerKind(Producer)) {
2263 default: 2138 default:
2264 break; 2139 break;
2265 case BoolFolding::PK_Icmp32: { 2140 case BoolFolding::PK_Icmp32: {
2266 // TODO(stichnot): Refactor similarities between this block and 2141 // TODO(stichnot): Refactor similarities between this block and
2267 // the corresponding code in lowerIcmp(). 2142 // the corresponding code in lowerIcmp().
2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 2143 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
2269 Operand *Src0 = Producer->getSrc(0); 2144 Operand *Src0 = Producer->getSrc(0);
2270 Operand *Src1 = legalize(Producer->getSrc(1)); 2145 Operand *Src1 = legalize(Producer->getSrc(1));
2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2146 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2272 _cmp(Src0RM, Src1); 2147 _cmp(Src0RM, Src1);
2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), 2148 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
2274 Inst->getTargetFalse()); 2149 Inst->getTargetFalse());
2275 return; 2150 return;
2276 } 2151 }
2277 } 2152 }
2278 } 2153 }
2279 2154
2280 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 2155 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2281 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2156 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2282 _cmp(Src0, Zero); 2157 _cmp(Src0, Zero);
2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 2158 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
2284 } 2159 }
2285 2160
2286 void TargetX8632::lowerCall(const InstCall *Instr) { 2161 template <class Machine>
2162 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
2287 // x86-32 calling convention: 2163 // x86-32 calling convention:
2288 // 2164 //
2289 // * At the point before the call, the stack must be aligned to 16 2165 // * At the point before the call, the stack must be aligned to 16
2290 // bytes. 2166 // bytes.
2291 // 2167 //
2292 // * The first four arguments of vector type, regardless of their 2168 // * The first four arguments of vector type, regardless of their
2293 // position relative to the other arguments in the argument list, are 2169 // position relative to the other arguments in the argument list, are
2294 // placed in registers xmm0 - xmm3. 2170 // placed in registers xmm0 - xmm3.
2295 // 2171 //
2296 // * Other arguments are pushed onto the stack in right-to-left order, 2172 // * Other arguments are pushed onto the stack in right-to-left order,
(...skipping 14 matching lines...) Expand all
2311 OperandList StackArgs, StackArgLocations; 2187 OperandList StackArgs, StackArgLocations;
2312 uint32_t ParameterAreaSizeBytes = 0; 2188 uint32_t ParameterAreaSizeBytes = 0;
2313 2189
2314 // Classify each argument operand according to the location where the 2190 // Classify each argument operand according to the location where the
2315 // argument is passed. 2191 // argument is passed.
2316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 2192 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2317 Operand *Arg = Instr->getArg(i); 2193 Operand *Arg = Instr->getArg(i);
2318 Type Ty = Arg->getType(); 2194 Type Ty = Arg->getType();
2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 2195 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2320 assert(typeWidthInBytes(Ty) >= 4); 2196 assert(typeWidthInBytes(Ty) >= 4);
2321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 2197 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
2322 XmmArgs.push_back(Arg); 2198 XmmArgs.push_back(Arg);
2323 } else { 2199 } else {
2324 StackArgs.push_back(Arg); 2200 StackArgs.push_back(Arg);
2325 if (isVectorType(Arg->getType())) { 2201 if (isVectorType(Arg->getType())) {
2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 2202 ParameterAreaSizeBytes =
2203 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2327 } 2204 }
2328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 2205 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
2329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 2206 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 2207 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 2208 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2332 } 2209 }
2333 } 2210 }
2334 2211
2335 // Adjust the parameter area so that the stack is aligned. It is 2212 // Adjust the parameter area so that the stack is aligned. It is
2336 // assumed that the stack is already aligned at the start of the 2213 // assumed that the stack is already aligned at the start of the
2337 // calling sequence. 2214 // calling sequence.
2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 2215 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2339 2216
2340 // Subtract the appropriate amount for the argument area. This also 2217 // Subtract the appropriate amount for the argument area. This also
2341 // takes care of setting the stack adjustment during emission. 2218 // takes care of setting the stack adjustment during emission.
2342 // 2219 //
2343 // TODO: If for some reason the call instruction gets dead-code 2220 // TODO: If for some reason the call instruction gets dead-code
2344 // eliminated after lowering, we would need to ensure that the 2221 // eliminated after lowering, we would need to ensure that the
2345 // pre-call and the post-call esp adjustment get eliminated as well. 2222 // pre-call and the post-call esp adjustment get eliminated as well.
2346 if (ParameterAreaSizeBytes) { 2223 if (ParameterAreaSizeBytes) {
2347 _adjust_stack(ParameterAreaSizeBytes); 2224 _adjust_stack(ParameterAreaSizeBytes);
2348 } 2225 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
2411 Operand *CallTarget = legalize(Instr->getCallTarget()); 2288 Operand *CallTarget = legalize(Instr->getCallTarget());
2412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); 2289 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2413 if (NeedSandboxing) { 2290 if (NeedSandboxing) {
2414 if (llvm::isa<Constant>(CallTarget)) { 2291 if (llvm::isa<Constant>(CallTarget)) {
2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2292 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2416 } else { 2293 } else {
2417 Variable *CallTargetVar = nullptr; 2294 Variable *CallTargetVar = nullptr;
2418 _mov(CallTargetVar, CallTarget); 2295 _mov(CallTargetVar, CallTarget);
2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2296 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2420 const SizeT BundleSize = 2297 const SizeT BundleSize =
2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 2298 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); 2299 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2423 CallTarget = CallTargetVar; 2300 CallTarget = CallTargetVar;
2424 } 2301 }
2425 } 2302 }
2426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 2303 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
2427 Context.insert(NewCall); 2304 Context.insert(NewCall);
2428 if (NeedSandboxing) 2305 if (NeedSandboxing)
2429 _bundle_unlock(); 2306 _bundle_unlock();
2430 if (ReturnRegHi) 2307 if (ReturnRegHi)
2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2308 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
2473 // st(0). 2350 // st(0).
2474 // If Dest ends up being a physical xmm register, the fstp emit code 2351 // If Dest ends up being a physical xmm register, the fstp emit code
2475 // will route st(0) through a temporary stack slot. 2352 // will route st(0) through a temporary stack slot.
2476 _fstp(Dest); 2353 _fstp(Dest);
2477 // Create a fake use of Dest in case it actually isn't used, 2354 // Create a fake use of Dest in case it actually isn't used,
2478 // because st(0) still needs to be popped. 2355 // because st(0) still needs to be popped.
2479 Context.insert(InstFakeUse::create(Func, Dest)); 2356 Context.insert(InstFakeUse::create(Func, Dest));
2480 } 2357 }
2481 } 2358 }
2482 2359
2483 void TargetX8632::lowerCast(const InstCast *Inst) { 2360 template <class Machine>
2361 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 2362 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2485 InstCast::OpKind CastKind = Inst->getCastKind(); 2363 InstCast::OpKind CastKind = Inst->getCastKind();
2486 Variable *Dest = Inst->getDest(); 2364 Variable *Dest = Inst->getDest();
2487 switch (CastKind) { 2365 switch (CastKind) {
2488 default: 2366 default:
2489 Func->setError("Cast type not supported"); 2367 Func->setError("Cast type not supported");
2490 return; 2368 return;
2491 case InstCast::Sext: { 2369 case InstCast::Sext: {
2492 // Src0RM is the source operand legalized to physical register or memory, 2370 // Src0RM is the source operand legalized to physical register or memory,
2493 // but not immediate, since the relevant x86 native instructions don't 2371 // but not immediate, since the relevant x86 native instructions don't
2494 // allow an immediate operand. If the operand is an immediate, we could 2372 // allow an immediate operand. If the operand is an immediate, we could
2495 // consider computing the strength-reduced result at translation time, 2373 // consider computing the strength-reduced result at translation time,
2496 // but we're unlikely to see something like that in the bitcode that 2374 // but we're unlikely to see something like that in the bitcode that
2497 // the optimizer wouldn't have already taken care of. 2375 // the optimizer wouldn't have already taken care of.
2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2376 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2499 if (isVectorType(Dest->getType())) { 2377 if (isVectorType(Dest->getType())) {
2500 Type DestTy = Dest->getType(); 2378 Type DestTy = Dest->getType();
2501 if (DestTy == IceType_v16i8) { 2379 if (DestTy == IceType_v16i8) {
2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 2380 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2503 Variable *OneMask = makeVectorOfOnes(Dest->getType()); 2381 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2504 Variable *T = makeReg(DestTy); 2382 Variable *T = makeReg(DestTy);
2505 _movp(T, Src0RM); 2383 _movp(T, Src0RM);
2506 _pand(T, OneMask); 2384 _pand(T, OneMask);
2507 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 2385 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2508 _pcmpgt(T, Zeros); 2386 _pcmpgt(T, Zeros);
2509 _movp(Dest, T); 2387 _movp(Dest, T);
2510 } else { 2388 } else {
2511 // width = width(elty) - 1; dest = (src << width) >> width 2389 // width = width(elty) - 1; dest = (src << width) >> width
2512 SizeT ShiftAmount = 2390 SizeT ShiftAmount =
2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; 2391 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2392 1;
2514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 2393 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2515 Variable *T = makeReg(DestTy); 2394 Variable *T = makeReg(DestTy);
2516 _movp(T, Src0RM); 2395 _movp(T, Src0RM);
2517 _psll(T, ShiftConstant); 2396 _psll(T, ShiftConstant);
2518 _psra(T, ShiftConstant); 2397 _psra(T, ShiftConstant);
2519 _movp(Dest, T); 2398 _movp(Dest, T);
2520 } 2399 }
2521 } else if (Dest->getType() == IceType_i64) { 2400 } else if (Dest->getType() == IceType_i64) {
2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 2401 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2523 Constant *Shift = Ctx->getConstantInt32(31); 2402 Constant *Shift = Ctx->getConstantInt32(31);
(...skipping 14 matching lines...) Expand all
2538 _mov(T_Hi, T_Lo); 2417 _mov(T_Hi, T_Lo);
2539 if (Src0RM->getType() != IceType_i1) 2418 if (Src0RM->getType() != IceType_i1)
2540 // For i1, the sar instruction is already done above. 2419 // For i1, the sar instruction is already done above.
2541 _sar(T_Hi, Shift); 2420 _sar(T_Hi, Shift);
2542 _mov(DestHi, T_Hi); 2421 _mov(DestHi, T_Hi);
2543 } else if (Src0RM->getType() == IceType_i1) { 2422 } else if (Src0RM->getType() == IceType_i1) {
2544 // t1 = src 2423 // t1 = src
2545 // shl t1, dst_bitwidth - 1 2424 // shl t1, dst_bitwidth - 1
2546 // sar t1, dst_bitwidth - 1 2425 // sar t1, dst_bitwidth - 1
2547 // dst = t1 2426 // dst = t1
2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 2427 size_t DestBits =
2428 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); 2429 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2550 Variable *T = makeReg(Dest->getType()); 2430 Variable *T = makeReg(Dest->getType());
2551 if (typeWidthInBytes(Dest->getType()) <= 2431 if (typeWidthInBytes(Dest->getType()) <=
2552 typeWidthInBytes(Src0RM->getType())) { 2432 typeWidthInBytes(Src0RM->getType())) {
2553 _mov(T, Src0RM); 2433 _mov(T, Src0RM);
2554 } else { 2434 } else {
2555 // Widen the source using movsx or movzx. (It doesn't matter 2435 // Widen the source using movsx or movzx. (It doesn't matter
2556 // which one, since the following shl/sar overwrite the bits.) 2436 // which one, since the following shl/sar overwrite the bits.)
2557 _movzx(T, Src0RM); 2437 _movzx(T, Src0RM);
2558 } 2438 }
(...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after
2943 case IceType_v4i32: 2823 case IceType_v4i32:
2944 case IceType_v4f32: { 2824 case IceType_v4f32: {
2945 _movp(Dest, legalizeToVar(Src0)); 2825 _movp(Dest, legalizeToVar(Src0));
2946 } break; 2826 } break;
2947 } 2827 }
2948 break; 2828 break;
2949 } 2829 }
2950 } 2830 }
2951 } 2831 }
2952 2832
2953 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { 2833 template <class Machine>
2834 void TargetX86Base<Machine>::lowerExtractElement(
2835 const InstExtractElement *Inst) {
2954 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2836 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2955 ConstantInteger32 *ElementIndex = 2837 ConstantInteger32 *ElementIndex =
2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); 2838 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2957 // Only constant indices are allowed in PNaCl IR. 2839 // Only constant indices are allowed in PNaCl IR.
2958 assert(ElementIndex); 2840 assert(ElementIndex);
2959 2841
2960 unsigned Index = ElementIndex->getValue(); 2842 unsigned Index = ElementIndex->getValue();
2961 Type Ty = SourceVectNotLegalized->getType(); 2843 Type Ty = SourceVectNotLegalized->getType();
2962 Type ElementTy = typeElementType(Ty); 2844 Type ElementTy = typeElementType(Ty);
2963 Type InVectorElementTy = getInVectorElementType(Ty); 2845 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2964 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2846 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2965 2847
2966 // TODO(wala): Determine the best lowering sequences for each type. 2848 // TODO(wala): Determine the best lowering sequences for each type.
2967 bool CanUsePextr = 2849 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
2968 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2850 InstructionSet >= Machine::SSE4_1;
2969 if (CanUsePextr && Ty != IceType_v4f32) { 2851 if (CanUsePextr && Ty != IceType_v4f32) {
2970 // Use pextrb, pextrw, or pextrd. 2852 // Use pextrb, pextrw, or pextrd.
2971 Constant *Mask = Ctx->getConstantInt32(Index); 2853 Constant *Mask = Ctx->getConstantInt32(Index);
2972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2854 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2973 _pextr(ExtractedElementR, SourceVectR, Mask); 2855 _pextr(ExtractedElementR, SourceVectR, Mask);
2974 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2856 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2975 // Use pshufd and movd/movss. 2857 // Use pshufd and movd/movss.
2976 Variable *T = nullptr; 2858 Variable *T = nullptr;
2977 if (Index) { 2859 if (Index) {
2978 // The shuffle only needs to occur if the element to be extracted 2860 // The shuffle only needs to occur if the element to be extracted
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
3019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); 2901 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
3020 lowerCast(Cast); 2902 lowerCast(Cast);
3021 ExtractedElementR = T; 2903 ExtractedElementR = T;
3022 } 2904 }
3023 2905
3024 // Copy the element to the destination. 2906 // Copy the element to the destination.
3025 Variable *Dest = Inst->getDest(); 2907 Variable *Dest = Inst->getDest();
3026 _mov(Dest, ExtractedElementR); 2908 _mov(Dest, ExtractedElementR);
3027 } 2909 }
3028 2910
3029 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { 2911 template <class Machine>
2912 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
3030 Operand *Src0 = Inst->getSrc(0); 2913 Operand *Src0 = Inst->getSrc(0);
3031 Operand *Src1 = Inst->getSrc(1); 2914 Operand *Src1 = Inst->getSrc(1);
3032 Variable *Dest = Inst->getDest(); 2915 Variable *Dest = Inst->getDest();
3033 2916
3034 if (isVectorType(Dest->getType())) { 2917 if (isVectorType(Dest->getType())) {
3035 InstFcmp::FCond Condition = Inst->getCondition(); 2918 InstFcmp::FCond Condition = Inst->getCondition();
3036 size_t Index = static_cast<size_t>(Condition); 2919 size_t Index = static_cast<size_t>(Condition);
3037 assert(Index < TableFcmpSize); 2920 assert(Index < Traits::TableFcmpSize);
3038 2921
3039 if (TableFcmp[Index].SwapVectorOperands) { 2922 if (Traits::TableFcmp[Index].SwapVectorOperands) {
3040 Operand *T = Src0; 2923 Operand *T = Src0;
3041 Src0 = Src1; 2924 Src0 = Src1;
3042 Src1 = T; 2925 Src1 = T;
3043 } 2926 }
3044 2927
3045 Variable *T = nullptr; 2928 Variable *T = nullptr;
3046 2929
3047 if (Condition == InstFcmp::True) { 2930 if (Condition == InstFcmp::True) {
3048 // makeVectorOfOnes() requires an integer vector type. 2931 // makeVectorOfOnes() requires an integer vector type.
3049 T = makeVectorOfMinusOnes(IceType_v4i32); 2932 T = makeVectorOfMinusOnes(IceType_v4i32);
3050 } else if (Condition == InstFcmp::False) { 2933 } else if (Condition == InstFcmp::False) {
3051 T = makeVectorOfZeros(Dest->getType()); 2934 T = makeVectorOfZeros(Dest->getType());
3052 } else { 2935 } else {
3053 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2936 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
3054 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2937 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3055 if (llvm::isa<OperandX8632Mem>(Src1RM)) 2938 if (llvm::isa<OperandX8632Mem>(Src1RM))
3056 Src1RM = legalizeToVar(Src1RM); 2939 Src1RM = legalizeToVar(Src1RM);
3057 2940
3058 switch (Condition) { 2941 switch (Condition) {
3059 default: { 2942 default: {
3060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate; 2943 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;
3061 assert(Predicate != CondX86::Cmpps_Invalid); 2944 assert(Predicate != CondX86::Cmpps_Invalid);
3062 T = makeReg(Src0RM->getType()); 2945 T = makeReg(Src0RM->getType());
3063 _movp(T, Src0RM); 2946 _movp(T, Src0RM);
3064 _cmpps(T, Src1RM, Predicate); 2947 _cmpps(T, Src1RM, Predicate);
3065 } break; 2948 } break;
3066 case InstFcmp::One: { 2949 case InstFcmp::One: {
3067 // Check both unequal and ordered. 2950 // Check both unequal and ordered.
3068 T = makeReg(Src0RM->getType()); 2951 T = makeReg(Src0RM->getType());
3069 Variable *T2 = makeReg(Src0RM->getType()); 2952 Variable *T2 = makeReg(Src0RM->getType());
3070 _movp(T, Src0RM); 2953 _movp(T, Src0RM);
(...skipping 28 matching lines...) Expand all
3099 // j<C2> label /* only if C2 != Br_None */ 2982 // j<C2> label /* only if C2 != Br_None */
3100 // FakeUse(a) /* only if C1 != Br_None */ 2983 // FakeUse(a) /* only if C1 != Br_None */
3101 // mov a, !<default> /* only if C1 != Br_None */ 2984 // mov a, !<default> /* only if C1 != Br_None */
3102 // label: /* only if C1 != Br_None */ 2985 // label: /* only if C1 != Br_None */
3103 // 2986 //
3104 // setcc lowering when C1 != Br_None && C2 == Br_None: 2987 // setcc lowering when C1 != Br_None && C2 == Br_None:
3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ 2988 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
3106 // setcc a, C1 2989 // setcc a, C1
3107 InstFcmp::FCond Condition = Inst->getCondition(); 2990 InstFcmp::FCond Condition = Inst->getCondition();
3108 size_t Index = static_cast<size_t>(Condition); 2991 size_t Index = static_cast<size_t>(Condition);
3109 assert(Index < TableFcmpSize); 2992 assert(Index < Traits::TableFcmpSize);
3110 if (TableFcmp[Index].SwapScalarOperands) 2993 if (Traits::TableFcmp[Index].SwapScalarOperands)
3111 std::swap(Src0, Src1); 2994 std::swap(Src0, Src1);
3112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); 2995 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None);
3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); 2996 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None);
3114 if (HasC1) { 2997 if (HasC1) {
3115 Src0 = legalize(Src0); 2998 Src0 = legalize(Src0);
3116 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2999 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3117 Variable *T = nullptr; 3000 Variable *T = nullptr;
3118 _mov(T, Src0); 3001 _mov(T, Src0);
3119 _ucomiss(T, Src1RM); 3002 _ucomiss(T, Src1RM);
3120 if (!HasC2) { 3003 if (!HasC2) {
3121 assert(TableFcmp[Index].Default); 3004 assert(Traits::TableFcmp[Index].Default);
3122 _setcc(Dest, TableFcmp[Index].C1); 3005 _setcc(Dest, Traits::TableFcmp[Index].C1);
3123 return; 3006 return;
3124 } 3007 }
3125 } 3008 }
3126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default); 3009 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
3127 _mov(Dest, Default); 3010 _mov(Dest, Default);
3128 if (HasC1) { 3011 if (HasC1) {
3129 InstX8632Label *Label = InstX8632Label::create(Func, this); 3012 InstX8632Label *Label = InstX8632Label::create(Func, this);
3130 _br(TableFcmp[Index].C1, Label); 3013 _br(Traits::TableFcmp[Index].C1, Label);
3131 if (HasC2) { 3014 if (HasC2) {
3132 _br(TableFcmp[Index].C2, Label); 3015 _br(Traits::TableFcmp[Index].C2, Label);
3133 } 3016 }
3134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default); 3017 Constant *NonDefault =
3018 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
3135 _mov_nonkillable(Dest, NonDefault); 3019 _mov_nonkillable(Dest, NonDefault);
3136 Context.insert(Label); 3020 Context.insert(Label);
3137 } 3021 }
3138 } 3022 }
3139 3023
3140 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 3024 template <class Machine>
3025 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
3141 Operand *Src0 = legalize(Inst->getSrc(0)); 3026 Operand *Src0 = legalize(Inst->getSrc(0));
3142 Operand *Src1 = legalize(Inst->getSrc(1)); 3027 Operand *Src1 = legalize(Inst->getSrc(1));
3143 Variable *Dest = Inst->getDest(); 3028 Variable *Dest = Inst->getDest();
3144 3029
3145 if (isVectorType(Dest->getType())) { 3030 if (isVectorType(Dest->getType())) {
3146 Type Ty = Src0->getType(); 3031 Type Ty = Src0->getType();
3147 // Promote i1 vectors to 128 bit integer vector types. 3032 // Promote i1 vectors to 128 bit integer vector types.
3148 if (typeElementType(Ty) == IceType_i1) { 3033 if (typeElementType(Ty) == IceType_i1) {
3149 Type NewTy = IceType_NUM; 3034 Type NewTy = IceType_NUM;
3150 switch (Ty) { 3035 switch (Ty) {
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
3248 3133
3249 _movp(Dest, T); 3134 _movp(Dest, T);
3250 eliminateNextVectorSextInstruction(Dest); 3135 eliminateNextVectorSextInstruction(Dest);
3251 return; 3136 return;
3252 } 3137 }
3253 3138
3254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 3139 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
3255 if (Src0->getType() == IceType_i64) { 3140 if (Src0->getType() == IceType_i64) {
3256 InstIcmp::ICond Condition = Inst->getCondition(); 3141 InstIcmp::ICond Condition = Inst->getCondition();
3257 size_t Index = static_cast<size_t>(Condition); 3142 size_t Index = static_cast<size_t>(Condition);
3258 assert(Index < TableIcmp64Size); 3143 assert(Index < Traits::TableIcmp64Size);
3259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 3144 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 3145 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 3146 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 3147 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
3263 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3148 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3264 Constant *One = Ctx->getConstantInt32(1); 3149 Constant *One = Ctx->getConstantInt32(1);
3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); 3150 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); 3151 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
3267 _mov(Dest, One); 3152 _mov(Dest, One);
3268 _cmp(Src0HiRM, Src1HiRI); 3153 _cmp(Src0HiRM, Src1HiRI);
3269 if (TableIcmp64[Index].C1 != CondX86::Br_None) 3154 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None)
3270 _br(TableIcmp64[Index].C1, LabelTrue); 3155 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
3271 if (TableIcmp64[Index].C2 != CondX86::Br_None) 3156 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None)
3272 _br(TableIcmp64[Index].C2, LabelFalse); 3157 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
3273 _cmp(Src0LoRM, Src1LoRI); 3158 _cmp(Src0LoRM, Src1LoRI);
3274 _br(TableIcmp64[Index].C3, LabelTrue); 3159 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
3275 Context.insert(LabelFalse); 3160 Context.insert(LabelFalse);
3276 _mov_nonkillable(Dest, Zero); 3161 _mov_nonkillable(Dest, Zero);
3277 Context.insert(LabelTrue); 3162 Context.insert(LabelTrue);
3278 return; 3163 return;
3279 } 3164 }
3280 3165
3281 // cmp b, c 3166 // cmp b, c
3282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 3167 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
3283 _cmp(Src0RM, Src1); 3168 _cmp(Src0RM, Src1);
3284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition())); 3169 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
3285 } 3170 }
3286 3171
3287 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { 3172 template <class Machine>
3173 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
3288 Operand *SourceVectNotLegalized = Inst->getSrc(0); 3174 Operand *SourceVectNotLegalized = Inst->getSrc(0);
3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 3175 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
3290 ConstantInteger32 *ElementIndex = 3176 ConstantInteger32 *ElementIndex =
3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 3177 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
3292 // Only constant indices are allowed in PNaCl IR. 3178 // Only constant indices are allowed in PNaCl IR.
3293 assert(ElementIndex); 3179 assert(ElementIndex);
3294 unsigned Index = ElementIndex->getValue(); 3180 unsigned Index = ElementIndex->getValue();
3295 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 3181 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
3296 3182
3297 Type Ty = SourceVectNotLegalized->getType(); 3183 Type Ty = SourceVectNotLegalized->getType();
3298 Type ElementTy = typeElementType(Ty); 3184 Type ElementTy = typeElementType(Ty);
3299 Type InVectorElementTy = getInVectorElementType(Ty); 3185 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
3300 3186
3301 if (ElementTy == IceType_i1) { 3187 if (ElementTy == IceType_i1) {
3302 // Expand the element to the appropriate size for it to be inserted 3188 // Expand the element to the appropriate size for it to be inserted
3303 // in the vector. 3189 // in the vector.
3304 Variable *Expanded = Func->makeVariable(InVectorElementTy); 3190 Variable *Expanded = Func->makeVariable(InVectorElementTy);
3305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, 3191 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3306 ElementToInsertNotLegalized); 3192 ElementToInsertNotLegalized);
3307 lowerCast(Cast); 3193 lowerCast(Cast);
3308 ElementToInsertNotLegalized = Expanded; 3194 ElementToInsertNotLegalized = Expanded;
3309 } 3195 }
3310 3196
3311 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { 3197 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
3198 InstructionSet >= Machine::SSE4_1) {
3312 // Use insertps, pinsrb, pinsrw, or pinsrd. 3199 // Use insertps, pinsrb, pinsrw, or pinsrd.
3313 Operand *ElementRM = 3200 Operand *ElementRM =
3314 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 3201 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3315 Operand *SourceVectRM = 3202 Operand *SourceVectRM =
3316 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3203 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3317 Variable *T = makeReg(Ty); 3204 Variable *T = makeReg(Ty);
3318 _movp(T, SourceVectRM); 3205 _movp(T, SourceVectRM);
3319 if (Ty == IceType_v4f32) 3206 if (Ty == IceType_v4f32)
3320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 3207 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3321 else 3208 else
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
3400 OperandX8632Mem *Loc = 3287 OperandX8632Mem *Loc =
3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 3288 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); 3289 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
3403 3290
3404 Variable *T = makeReg(Ty); 3291 Variable *T = makeReg(Ty);
3405 _movp(T, Slot); 3292 _movp(T, Slot);
3406 _movp(Inst->getDest(), T); 3293 _movp(Inst->getDest(), T);
3407 } 3294 }
3408 } 3295 }
3409 3296
3410 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 3297 template <class Machine>
3298 void TargetX86Base<Machine>::lowerIntrinsicCall(
3299 const InstIntrinsicCall *Instr) {
3411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { 3300 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
3412 case Intrinsics::AtomicCmpxchg: { 3301 case Intrinsics::AtomicCmpxchg: {
3413 if (!Intrinsics::isMemoryOrderValid( 3302 if (!Intrinsics::isMemoryOrderValid(
3414 ID, getConstantMemoryOrder(Instr->getArg(3)), 3303 ID, getConstantMemoryOrder(Instr->getArg(3)),
3415 getConstantMemoryOrder(Instr->getArg(4)))) { 3304 getConstantMemoryOrder(Instr->getArg(4)))) {
3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 3305 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
3417 return; 3306 return;
3418 } 3307 }
3419 Variable *DestPrev = Instr->getDest(); 3308 Variable *DestPrev = Instr->getDest();
3420 Operand *PtrToMem = Instr->getArg(0); 3309 Operand *PtrToMem = Instr->getArg(0);
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
3503 Context.insert( 3392 Context.insert(
3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 3393 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
3505 return; 3394 return;
3506 } 3395 }
3507 case Intrinsics::AtomicRMW: 3396 case Intrinsics::AtomicRMW:
3508 if (!Intrinsics::isMemoryOrderValid( 3397 if (!Intrinsics::isMemoryOrderValid(
3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 3398 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3510 Func->setError("Unexpected memory ordering for AtomicRMW"); 3399 Func->setError("Unexpected memory ordering for AtomicRMW");
3511 return; 3400 return;
3512 } 3401 }
3513 lowerAtomicRMW( 3402 lowerAtomicRMW(Instr->getDest(),
3514 Instr->getDest(), 3403 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
3515 static_cast<uint32_t>( 3404 Instr->getArg(0))->getValue()),
3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), 3405 Instr->getArg(1), Instr->getArg(2));
3517 Instr->getArg(1), Instr->getArg(2));
3518 return; 3406 return;
3519 case Intrinsics::AtomicStore: { 3407 case Intrinsics::AtomicStore: {
3520 if (!Intrinsics::isMemoryOrderValid( 3408 if (!Intrinsics::isMemoryOrderValid(
3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 3409 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
3522 Func->setError("Unexpected memory ordering for AtomicStore"); 3410 Func->setError("Unexpected memory ordering for AtomicStore");
3523 return; 3411 return;
3524 } 3412 }
3525 // We require the memory address to be naturally aligned. 3413 // We require the memory address to be naturally aligned.
3526 // Given that is the case, then normal stores are atomic. 3414 // Given that is the case, then normal stores are atomic.
3527 // Add a fence after the store to make it visible. 3415 // Add a fence after the store to make it visible.
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
3733 case Intrinsics::Trap: 3621 case Intrinsics::Trap:
3734 _ud2(); 3622 _ud2();
3735 return; 3623 return;
3736 case Intrinsics::UnknownIntrinsic: 3624 case Intrinsics::UnknownIntrinsic:
3737 Func->setError("Should not be lowering UnknownIntrinsic"); 3625 Func->setError("Should not be lowering UnknownIntrinsic");
3738 return; 3626 return;
3739 } 3627 }
3740 return; 3628 return;
3741 } 3629 }
3742 3630
3743 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, 3631 template <class Machine>
3744 Operand *Expected, Operand *Desired) { 3632 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3633 Operand *Ptr, Operand *Expected,
3634 Operand *Desired) {
3745 if (Expected->getType() == IceType_i64) { 3635 if (Expected->getType() == IceType_i64) {
3746 // Reserve the pre-colored registers first, before adding any more 3636 // Reserve the pre-colored registers first, before adding any more
3747 // infinite-weight variables from formMemoryOperand's legalization. 3637 // infinite-weight variables from formMemoryOperand's legalization.
3748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 3638 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); 3639 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 3640 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); 3641 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
3752 _mov(T_eax, loOperand(Expected)); 3642 _mov(T_eax, loOperand(Expected));
3753 _mov(T_edx, hiOperand(Expected)); 3643 _mov(T_edx, hiOperand(Expected));
3754 _mov(T_ebx, loOperand(Desired)); 3644 _mov(T_ebx, loOperand(Desired));
3755 _mov(T_ecx, hiOperand(Desired)); 3645 _mov(T_ecx, hiOperand(Desired));
3756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3646 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
3757 const bool Locked = true; 3647 const bool Locked = true;
3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3648 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3649 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3650 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3761 _mov(DestLo, T_eax); 3651 _mov(DestLo, T_eax);
3762 _mov(DestHi, T_edx); 3652 _mov(DestHi, T_edx);
3763 return; 3653 return;
3764 } 3654 }
3765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); 3655 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
3766 _mov(T_eax, Expected); 3656 _mov(T_eax, Expected);
3767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3657 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
3768 Variable *DesiredReg = legalizeToVar(Desired); 3658 Variable *DesiredReg = legalizeToVar(Desired);
3769 const bool Locked = true; 3659 const bool Locked = true;
3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3660 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3771 _mov(DestPrev, T_eax); 3661 _mov(DestPrev, T_eax);
3772 } 3662 }
3773 3663
3774 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, 3664 template <class Machine>
3775 Operand *Expected, 3665 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3776 Operand *Desired) { 3666 Operand *PtrToMem,
3667 Operand *Expected,
3668 Operand *Desired) {
3777 if (Ctx->getFlags().getOptLevel() == Opt_m1) 3669 if (Ctx->getFlags().getOptLevel() == Opt_m1)
3778 return false; 3670 return false;
3779 // Peek ahead a few instructions and see how Dest is used. 3671 // Peek ahead a few instructions and see how Dest is used.
3780 // It's very common to have: 3672 // It's very common to have:
3781 // 3673 //
3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) 3674 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
3783 // [%y_phi = ...] // list of phi stores 3675 // [%y_phi = ...] // list of phi stores
3784 // %p = icmp eq i32 %x, %expected 3676 // %p = icmp eq i32 %x, %expected
3785 // br i1 %p, label %l1, label %l2 3677 // br i1 %p, label %l1, label %l2
3786 // 3678 //
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
3837 NextBr->setDeleted(); 3729 NextBr->setDeleted();
3838 Context.advanceNext(); 3730 Context.advanceNext();
3839 Context.advanceNext(); 3731 Context.advanceNext();
3840 return true; 3732 return true;
3841 } 3733 }
3842 } 3734 }
3843 } 3735 }
3844 return false; 3736 return false;
3845 } 3737 }
3846 3738
3847 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3739 template <class Machine>
3848 Operand *Ptr, Operand *Val) { 3740 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3741 Operand *Ptr, Operand *Val) {
3849 bool NeedsCmpxchg = false; 3742 bool NeedsCmpxchg = false;
3850 LowerBinOp Op_Lo = nullptr; 3743 LowerBinOp Op_Lo = nullptr;
3851 LowerBinOp Op_Hi = nullptr; 3744 LowerBinOp Op_Hi = nullptr;
3852 switch (Operation) { 3745 switch (Operation) {
3853 default: 3746 default:
3854 Func->setError("Unknown AtomicRMW operation"); 3747 Func->setError("Unknown AtomicRMW operation");
3855 return; 3748 return;
3856 case Intrinsics::AtomicAdd: { 3749 case Intrinsics::AtomicAdd: {
3857 if (Dest->getType() == IceType_i64) { 3750 if (Dest->getType() == IceType_i64) {
3858 // All the fall-through paths must set this to true, but use this 3751 // All the fall-through paths must set this to true, but use this
3859 // for asserting. 3752 // for asserting.
3860 NeedsCmpxchg = true; 3753 NeedsCmpxchg = true;
3861 Op_Lo = &TargetX8632::_add; 3754 Op_Lo = &TargetX86Base<Machine>::_add;
3862 Op_Hi = &TargetX8632::_adc; 3755 Op_Hi = &TargetX86Base<Machine>::_adc;
3863 break; 3756 break;
3864 } 3757 }
3865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3758 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
3866 const bool Locked = true; 3759 const bool Locked = true;
3867 Variable *T = nullptr; 3760 Variable *T = nullptr;
3868 _mov(T, Val); 3761 _mov(T, Val);
3869 _xadd(Addr, T, Locked); 3762 _xadd(Addr, T, Locked);
3870 _mov(Dest, T); 3763 _mov(Dest, T);
3871 return; 3764 return;
3872 } 3765 }
3873 case Intrinsics::AtomicSub: { 3766 case Intrinsics::AtomicSub: {
3874 if (Dest->getType() == IceType_i64) { 3767 if (Dest->getType() == IceType_i64) {
3875 NeedsCmpxchg = true; 3768 NeedsCmpxchg = true;
3876 Op_Lo = &TargetX8632::_sub; 3769 Op_Lo = &TargetX86Base<Machine>::_sub;
3877 Op_Hi = &TargetX8632::_sbb; 3770 Op_Hi = &TargetX86Base<Machine>::_sbb;
3878 break; 3771 break;
3879 } 3772 }
3880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3773 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
3881 const bool Locked = true; 3774 const bool Locked = true;
3882 Variable *T = nullptr; 3775 Variable *T = nullptr;
3883 _mov(T, Val); 3776 _mov(T, Val);
3884 _neg(T); 3777 _neg(T);
3885 _xadd(Addr, T, Locked); 3778 _xadd(Addr, T, Locked);
3886 _mov(Dest, T); 3779 _mov(Dest, T);
3887 return; 3780 return;
3888 } 3781 }
3889 case Intrinsics::AtomicOr: 3782 case Intrinsics::AtomicOr:
3890 // TODO(jvoung): If Dest is null or dead, then some of these 3783 // TODO(jvoung): If Dest is null or dead, then some of these
3891 // operations do not need an "exchange", but just a locked op. 3784 // operations do not need an "exchange", but just a locked op.
3892 // That appears to be "worth" it for sub, or, and, and xor. 3785 // That appears to be "worth" it for sub, or, and, and xor.
3893 // xadd is probably fine vs lock add for add, and xchg is fine 3786 // xadd is probably fine vs lock add for add, and xchg is fine
3894 // vs an atomic store. 3787 // vs an atomic store.
3895 NeedsCmpxchg = true; 3788 NeedsCmpxchg = true;
3896 Op_Lo = &TargetX8632::_or; 3789 Op_Lo = &TargetX86Base<Machine>::_or;
3897 Op_Hi = &TargetX8632::_or; 3790 Op_Hi = &TargetX86Base<Machine>::_or;
3898 break; 3791 break;
3899 case Intrinsics::AtomicAnd: 3792 case Intrinsics::AtomicAnd:
3900 NeedsCmpxchg = true; 3793 NeedsCmpxchg = true;
3901 Op_Lo = &TargetX8632::_and; 3794 Op_Lo = &TargetX86Base<Machine>::_and;
3902 Op_Hi = &TargetX8632::_and; 3795 Op_Hi = &TargetX86Base<Machine>::_and;
3903 break; 3796 break;
3904 case Intrinsics::AtomicXor: 3797 case Intrinsics::AtomicXor:
3905 NeedsCmpxchg = true; 3798 NeedsCmpxchg = true;
3906 Op_Lo = &TargetX8632::_xor; 3799 Op_Lo = &TargetX86Base<Machine>::_xor;
3907 Op_Hi = &TargetX8632::_xor; 3800 Op_Hi = &TargetX86Base<Machine>::_xor;
3908 break; 3801 break;
3909 case Intrinsics::AtomicExchange: 3802 case Intrinsics::AtomicExchange:
3910 if (Dest->getType() == IceType_i64) { 3803 if (Dest->getType() == IceType_i64) {
3911 NeedsCmpxchg = true; 3804 NeedsCmpxchg = true;
3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3805 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3913 // just need to be moved to the ecx and ebx registers. 3806 // just need to be moved to the ecx and ebx registers.
3914 Op_Lo = nullptr; 3807 Op_Lo = nullptr;
3915 Op_Hi = nullptr; 3808 Op_Hi = nullptr;
3916 break; 3809 break;
3917 } 3810 }
3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3811 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
3919 Variable *T = nullptr; 3812 Variable *T = nullptr;
3920 _mov(T, Val); 3813 _mov(T, Val);
3921 _xchg(Addr, T); 3814 _xchg(Addr, T);
3922 _mov(Dest, T); 3815 _mov(Dest, T);
3923 return; 3816 return;
3924 } 3817 }
3925 // Otherwise, we need a cmpxchg loop. 3818 // Otherwise, we need a cmpxchg loop.
3926 (void)NeedsCmpxchg; 3819 (void)NeedsCmpxchg;
3927 assert(NeedsCmpxchg); 3820 assert(NeedsCmpxchg);
3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); 3821 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
3929 } 3822 }
3930 3823
3931 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, 3824 template <class Machine>
3932 Variable *Dest, Operand *Ptr, 3825 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
3933 Operand *Val) { 3826 LowerBinOp Op_Hi,
3827 Variable *Dest,
3828 Operand *Ptr,
3829 Operand *Val) {
3934 // Expand a more complex RMW operation as a cmpxchg loop: 3830 // Expand a more complex RMW operation as a cmpxchg loop:
3935 // For 64-bit: 3831 // For 64-bit:
3936 // mov eax, [ptr] 3832 // mov eax, [ptr]
3937 // mov edx, [ptr + 4] 3833 // mov edx, [ptr + 4]
3938 // .LABEL: 3834 // .LABEL:
3939 // mov ebx, eax 3835 // mov ebx, eax
3940 // <Op_Lo> ebx, <desired_adj_lo> 3836 // <Op_Lo> ebx, <desired_adj_lo>
3941 // mov ecx, edx 3837 // mov ecx, edx
3942 // <Op_Hi> ecx, <desired_adj_hi> 3838 // <Op_Hi> ecx, <desired_adj_hi>
3943 // lock cmpxchg8b [ptr] 3839 // lock cmpxchg8b [ptr]
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
4028 // The address base (if any) is also reused in the loop. 3924 // The address base (if any) is also reused in the loop.
4029 if (Variable *Base = Addr->getBase()) 3925 if (Variable *Base = Addr->getBase())
4030 Context.insert(InstFakeUse::create(Func, Base)); 3926 Context.insert(InstFakeUse::create(Func, Base));
4031 _mov(Dest, T_eax); 3927 _mov(Dest, T_eax);
4032 } 3928 }
4033 3929
4034 // Lowers count {trailing, leading} zeros intrinsic. 3930 // Lowers count {trailing, leading} zeros intrinsic.
4035 // 3931 //
4036 // We could do constant folding here, but that should have 3932 // We could do constant folding here, but that should have
4037 // been done by the front-end/middle-end optimizations. 3933 // been done by the front-end/middle-end optimizations.
4038 void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, 3934 template <class Machine>
4039 Operand *FirstVal, Operand *SecondVal) { 3935 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3936 Operand *FirstVal,
3937 Operand *SecondVal) {
4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). 3938 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
4041 // Then the instructions will handle the Val == 0 case much more simply 3939 // Then the instructions will handle the Val == 0 case much more simply
4042 // and won't require conversion from bit position to number of zeros. 3940 // and won't require conversion from bit position to number of zeros.
4043 // 3941 //
4044 // Otherwise: 3942 // Otherwise:
4045 // bsr IF_NOT_ZERO, Val 3943 // bsr IF_NOT_ZERO, Val
4046 // mov T_DEST, 63 3944 // mov T_DEST, 63
4047 // cmovne T_DEST, IF_NOT_ZERO 3945 // cmovne T_DEST, IF_NOT_ZERO
4048 // xor T_DEST, 31 3946 // xor T_DEST, 31
4049 // mov DEST, T_DEST 3947 // mov DEST, T_DEST
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
4100 } else { 3998 } else {
4101 _bsr(T_Dest2, SecondVar); 3999 _bsr(T_Dest2, SecondVar);
4102 _xor(T_Dest2, ThirtyOne); 4000 _xor(T_Dest2, ThirtyOne);
4103 } 4001 }
4104 _test(SecondVar, SecondVar); 4002 _test(SecondVar, SecondVar);
4105 _cmov(T_Dest2, T_Dest, CondX86::Br_e); 4003 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
4106 _mov(DestLo, T_Dest2); 4004 _mov(DestLo, T_Dest2);
4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 4005 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4108 } 4006 }
4109 4007
4110 namespace {
4111
4112 bool isAdd(const Inst *Inst) { 4008 bool isAdd(const Inst *Inst) {
4113 if (const InstArithmetic *Arith = 4009 if (const InstArithmetic *Arith =
4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 4010 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
4115 return (Arith->getOp() == InstArithmetic::Add); 4011 return (Arith->getOp() == InstArithmetic::Add);
4116 } 4012 }
4117 return false; 4013 return false;
4118 } 4014 }
4119 4015
4120 void dumpAddressOpt(const Cfg *Func, const Variable *Base, 4016 void dumpAddressOpt(const Cfg *Func, const Variable *Base,
4121 const Variable *Index, uint16_t Shift, int32_t Offset, 4017 const Variable *Index, uint16_t Shift, int32_t Offset,
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
4342 // set Index=Var, Offset+=(Const<<Shift) 4238 // set Index=Var, Offset+=(Const<<Shift)
4343 4239
4344 // Index is Index=Var-Const ==> 4240 // Index is Index=Var-Const ==>
4345 // set Index=Var, Offset-=(Const<<Shift) 4241 // set Index=Var, Offset-=(Const<<Shift)
4346 4242
4347 // TODO: consider overflow issues with respect to Offset. 4243 // TODO: consider overflow issues with respect to Offset.
4348 // TODO: handle symbolic constants. 4244 // TODO: handle symbolic constants.
4349 } 4245 }
4350 } 4246 }
4351 4247
4352 } // anonymous namespace 4248 template <class Machine>
4353 4249 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
4354 void TargetX8632::lowerLoad(const InstLoad *Load) {
4355 // A Load instruction can be treated the same as an Assign 4250 // A Load instruction can be treated the same as an Assign
4356 // instruction, after the source operand is transformed into an 4251 // instruction, after the source operand is transformed into an
4357 // OperandX8632Mem operand. Note that the address mode 4252 // OperandX8632Mem operand. Note that the address mode
4358 // optimization already creates an OperandX8632Mem operand, so it 4253 // optimization already creates an OperandX8632Mem operand, so it
4359 // doesn't need another level of transformation. 4254 // doesn't need another level of transformation.
4360 Variable *DestLoad = Load->getDest(); 4255 Variable *DestLoad = Load->getDest();
4361 Type Ty = DestLoad->getType(); 4256 Type Ty = DestLoad->getType();
4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4257 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 4258 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4364 lowerAssign(Assign); 4259 lowerAssign(Assign);
4365 } 4260 }
4366 4261
4367 void TargetX8632::doAddressOptLoad() { 4262 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4368 Inst *Inst = Context.getCur(); 4263 Inst *Inst = Context.getCur();
4369 Variable *Dest = Inst->getDest(); 4264 Variable *Dest = Inst->getDest();
4370 Operand *Addr = Inst->getSrc(0); 4265 Operand *Addr = Inst->getSrc(0);
4371 Variable *Index = nullptr; 4266 Variable *Index = nullptr;
4372 uint16_t Shift = 0; 4267 uint16_t Shift = 0;
4373 int32_t Offset = 0; // TODO: make Constant 4268 int32_t Offset = 0; // TODO: make Constant
4374 // Vanilla ICE load instructions should not use the segment registers, 4269 // Vanilla ICE load instructions should not use the segment registers,
4375 // and computeAddressOpt only works at the level of Variables and Constants, 4270 // and computeAddressOpt only works at the level of Variables and Constants,
4376 // not other OperandX8632Mem, so there should be no mention of segment 4271 // not other OperandX8632Mem, so there should be no mention of segment
4377 // registers there either. 4272 // registers there either.
4378 const OperandX8632Mem::SegmentRegisters SegmentReg = 4273 const OperandX8632Mem::SegmentRegisters SegmentReg =
4379 OperandX8632Mem::DefaultSegment; 4274 OperandX8632Mem::DefaultSegment;
4380 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4275 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4276 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4382 if (Base && Addr != Base) { 4277 if (Base && Addr != Base) {
4383 Inst->setDeleted(); 4278 Inst->setDeleted();
4384 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4279 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 4280 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
4386 Shift, SegmentReg); 4281 Shift, SegmentReg);
4387 Context.insert(InstLoad::create(Func, Dest, Addr)); 4282 Context.insert(InstLoad::create(Func, Dest, Addr));
4388 } 4283 }
4389 } 4284 }
4390 4285
4391 void TargetX8632::randomlyInsertNop(float Probability) { 4286 template <class Machine>
4287 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 4288 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4393 if (RNG.getTrueWithProbability(Probability)) { 4289 if (RNG.getTrueWithProbability(Probability)) {
4394 _nop(RNG(X86_NUM_NOP_VARIANTS)); 4290 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4395 } 4291 }
4396 } 4292 }
4397 4293
4398 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { 4294 template <class Machine>
4295 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4399 Func->setError("Phi found in regular instruction list"); 4296 Func->setError("Phi found in regular instruction list");
4400 } 4297 }
4401 4298
4402 void TargetX8632::lowerRet(const InstRet *Inst) { 4299 template <class Machine>
4300 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {
4403 Variable *Reg = nullptr; 4301 Variable *Reg = nullptr;
4404 if (Inst->hasRetValue()) { 4302 if (Inst->hasRetValue()) {
4405 Operand *Src0 = legalize(Inst->getRetValue()); 4303 Operand *Src0 = legalize(Inst->getRetValue());
4406 if (Src0->getType() == IceType_i64) { 4304 if (Src0->getType() == IceType_i64) {
4407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); 4305 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); 4306 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
4409 Reg = eax; 4307 Reg = eax;
4410 Context.insert(InstFakeUse::create(Func, edx)); 4308 Context.insert(InstFakeUse::create(Func, edx));
4411 } else if (isScalarFloatingType(Src0->getType())) { 4309 } else if (isScalarFloatingType(Src0->getType())) {
4412 _fld(Src0); 4310 _fld(Src0);
4413 } else if (isVectorType(Src0->getType())) { 4311 } else if (isVectorType(Src0->getType())) {
4414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); 4312 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
4415 } else { 4313 } else {
4416 _mov(Reg, Src0, RegX8632::Reg_eax); 4314 _mov(Reg, Src0, RegX8632::Reg_eax);
4417 } 4315 }
4418 } 4316 }
4419 // Add a ret instruction even if sandboxing is enabled, because 4317 // Add a ret instruction even if sandboxing is enabled, because
4420 // addEpilog explicitly looks for a ret instruction as a marker for 4318 // addEpilog explicitly looks for a ret instruction as a marker for
4421 // where to insert the frame removal instructions. 4319 // where to insert the frame removal instructions.
4422 _ret(Reg); 4320 _ret(Reg);
4423 // Add a fake use of esp to make sure esp stays alive for the entire 4321 // Add a fake use of esp to make sure esp stays alive for the entire
4424 // function. Otherwise post-call esp adjustments get dead-code 4322 // function. Otherwise post-call esp adjustments get dead-code
4425 // eliminated. TODO: Are there more places where the fake use 4323 // eliminated. TODO: Are there more places where the fake use
4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 4324 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4427 // have a ret instruction. 4325 // have a ret instruction.
4428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 4326 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
4429 Context.insert(InstFakeUse::create(Func, esp)); 4327 Context.insert(InstFakeUse::create(Func, esp));
4430 } 4328 }
4431 4329
4432 void TargetX8632::lowerSelect(const InstSelect *Inst) { 4330 template <class Machine>
4331 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4433 Variable *Dest = Inst->getDest(); 4332 Variable *Dest = Inst->getDest();
4434 Type DestTy = Dest->getType(); 4333 Type DestTy = Dest->getType();
4435 Operand *SrcT = Inst->getTrueOperand(); 4334 Operand *SrcT = Inst->getTrueOperand();
4436 Operand *SrcF = Inst->getFalseOperand(); 4335 Operand *SrcF = Inst->getFalseOperand();
4437 Operand *Condition = Inst->getCondition(); 4336 Operand *Condition = Inst->getCondition();
4438 4337
4439 if (isVectorType(DestTy)) { 4338 if (isVectorType(DestTy)) {
4440 Type SrcTy = SrcT->getType(); 4339 Type SrcTy = SrcT->getType();
4441 Variable *T = makeReg(SrcTy); 4340 Variable *T = makeReg(SrcTy);
4442 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 4341 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 4342 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4444 if (InstructionSet >= SSE4_1) { 4343 if (InstructionSet >= Machine::SSE4_1) {
4445 // TODO(wala): If the condition operand is a constant, use blendps 4344 // TODO(wala): If the condition operand is a constant, use blendps
4446 // or pblendw. 4345 // or pblendw.
4447 // 4346 //
4448 // Use blendvps or pblendvb to implement select. 4347 // Use blendvps or pblendvb to implement select.
4449 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 4348 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4450 SrcTy == IceType_v4f32) { 4349 SrcTy == IceType_v4f32) {
4451 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4350 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); 4351 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
4453 _movp(xmm0, ConditionRM); 4352 _movp(xmm0, ConditionRM);
4454 _psll(xmm0, Ctx->getConstantInt8(31)); 4353 _psll(xmm0, Ctx->getConstantInt8(31));
4455 _movp(T, SrcFRM); 4354 _movp(T, SrcFRM);
4456 _blendvps(T, SrcTRM, xmm0); 4355 _blendvps(T, SrcTRM, xmm0);
4457 _movp(Dest, T); 4356 _movp(Dest, T);
4458 } else { 4357 } else {
4459 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 4358 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 4359 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4461 : IceType_v16i8; 4360 : IceType_v16i8;
4462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); 4361 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
4463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 4362 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4464 _movp(T, SrcFRM); 4363 _movp(T, SrcFRM);
4465 _pblendvb(T, SrcTRM, xmm0); 4364 _pblendvb(T, SrcTRM, xmm0);
4466 _movp(Dest, T); 4365 _movp(Dest, T);
4467 } 4366 }
4468 return; 4367 return;
4469 } 4368 }
4470 // Lower select without SSE4.1: 4369 // Lower select without Machine::SSE4.1:
4471 // a=d?b:c ==> 4370 // a=d?b:c ==>
4472 // if elementtype(d) != i1: 4371 // if elementtype(d) != i1:
4473 // d=sext(d); 4372 // d=sext(d);
4474 // a=(b&d)|(c&~d); 4373 // a=(b&d)|(c&~d);
4475 Variable *T2 = makeReg(SrcTy); 4374 Variable *T2 = makeReg(SrcTy);
4476 // Sign extend the condition operand if applicable. 4375 // Sign extend the condition operand if applicable.
4477 if (SrcTy == IceType_v4f32) { 4376 if (SrcTy == IceType_v4f32) {
4478 // The sext operation takes only integer arguments. 4377 // The sext operation takes only integer arguments.
4479 Variable *T3 = Func->makeVariable(IceType_v4i32); 4378 Variable *T3 = Func->makeVariable(IceType_v4i32);
4480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 4379 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
(...skipping 17 matching lines...) Expand all
4498 Operand *CmpOpnd0 = nullptr; 4397 Operand *CmpOpnd0 = nullptr;
4499 Operand *CmpOpnd1 = nullptr; 4398 Operand *CmpOpnd1 = nullptr;
4500 // Handle folding opportunities. 4399 // Handle folding opportunities.
4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4400 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4502 assert(Producer->isDeleted()); 4401 assert(Producer->isDeleted());
4503 switch (BoolFolding::getProducerKind(Producer)) { 4402 switch (BoolFolding::getProducerKind(Producer)) {
4504 default: 4403 default:
4505 break; 4404 break;
4506 case BoolFolding::PK_Icmp32: { 4405 case BoolFolding::PK_Icmp32: {
4507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 4406 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4508 Cond = getIcmp32Mapping(Cmp->getCondition()); 4407 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());
4509 CmpOpnd1 = legalize(Producer->getSrc(1)); 4408 CmpOpnd1 = legalize(Producer->getSrc(1));
4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); 4409 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
4511 } break; 4410 } break;
4512 } 4411 }
4513 } 4412 }
4514 if (CmpOpnd0 == nullptr) { 4413 if (CmpOpnd0 == nullptr) {
4515 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); 4414 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); 4415 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4517 } 4416 }
4518 assert(CmpOpnd0); 4417 assert(CmpOpnd0);
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
4562 4461
4563 assert(DestTy == IceType_i16 || DestTy == IceType_i32); 4462 assert(DestTy == IceType_i16 || DestTy == IceType_i32);
4564 Variable *T = nullptr; 4463 Variable *T = nullptr;
4565 SrcF = legalize(SrcF); 4464 SrcF = legalize(SrcF);
4566 _mov(T, SrcF); 4465 _mov(T, SrcF);
4567 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4466 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4568 _cmov(T, SrcT, Cond); 4467 _cmov(T, SrcT, Cond);
4569 _mov(Dest, T); 4468 _mov(Dest, T);
4570 } 4469 }
4571 4470
4572 void TargetX8632::lowerStore(const InstStore *Inst) { 4471 template <class Machine>
4472 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4573 Operand *Value = Inst->getData(); 4473 Operand *Value = Inst->getData();
4574 Operand *Addr = Inst->getAddr(); 4474 Operand *Addr = Inst->getAddr();
4575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 4475 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
4576 Type Ty = NewAddr->getType(); 4476 Type Ty = NewAddr->getType();
4577 4477
4578 if (Ty == IceType_i64) { 4478 if (Ty == IceType_i64) {
4579 Value = legalize(Value); 4479 Value = legalize(Value);
4580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4480 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4481 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); 4482 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); 4483 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
4584 } else if (isVectorType(Ty)) { 4484 } else if (isVectorType(Ty)) {
4585 _storep(legalizeToVar(Value), NewAddr); 4485 _storep(legalizeToVar(Value), NewAddr);
4586 } else { 4486 } else {
4587 Value = legalize(Value, Legal_Reg | Legal_Imm); 4487 Value = legalize(Value, Legal_Reg | Legal_Imm);
4588 _store(Value, NewAddr); 4488 _store(Value, NewAddr);
4589 } 4489 }
4590 } 4490 }
4591 4491
4592 void TargetX8632::doAddressOptStore() { 4492 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); 4493 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4594 Operand *Data = Inst->getData(); 4494 Operand *Data = Inst->getData();
4595 Operand *Addr = Inst->getAddr(); 4495 Operand *Addr = Inst->getAddr();
4596 Variable *Index = nullptr; 4496 Variable *Index = nullptr;
4597 uint16_t Shift = 0; 4497 uint16_t Shift = 0;
4598 int32_t Offset = 0; // TODO: make Constant 4498 int32_t Offset = 0; // TODO: make Constant
4599 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4499 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4600 // Vanilla ICE store instructions should not use the segment registers, 4500 // Vanilla ICE store instructions should not use the segment registers,
4601 // and computeAddressOpt only works at the level of Variables and Constants, 4501 // and computeAddressOpt only works at the level of Variables and Constants,
4602 // not other OperandX8632Mem, so there should be no mention of segment 4502 // not other OperandX8632Mem, so there should be no mention of segment
4603 // registers there either. 4503 // registers there either.
4604 const OperandX8632Mem::SegmentRegisters SegmentReg = 4504 const OperandX8632Mem::SegmentRegisters SegmentReg =
4605 OperandX8632Mem::DefaultSegment; 4505 OperandX8632Mem::DefaultSegment;
4606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4506 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4607 if (Base && Addr != Base) { 4507 if (Base && Addr != Base) {
4608 Inst->setDeleted(); 4508 Inst->setDeleted();
4609 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4509 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4510 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4611 Shift, SegmentReg); 4511 Shift, SegmentReg);
4612 InstStore *NewStore = InstStore::create(Func, Data, Addr); 4512 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4613 if (Inst->getDest()) 4513 if (Inst->getDest())
4614 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4514 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4615 Context.insert(NewStore); 4515 Context.insert(NewStore);
4616 } 4516 }
4617 } 4517 }
4618 4518
4619 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4519 template <class Machine>
4520 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4620 // This implements the most naive possible lowering. 4521 // This implements the most naive possible lowering.
4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4522 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4622 Operand *Src0 = Inst->getComparison(); 4523 Operand *Src0 = Inst->getComparison();
4623 SizeT NumCases = Inst->getNumCases(); 4524 SizeT NumCases = Inst->getNumCases();
4624 if (Src0->getType() == IceType_i64) { 4525 if (Src0->getType() == IceType_i64) {
4625 Src0 = legalize(Src0); // get Base/Index into physical registers 4526 Src0 = legalize(Src0); // get Base/Index into physical registers
4626 Operand *Src0Lo = loOperand(Src0); 4527 Operand *Src0Lo = loOperand(Src0);
4627 Operand *Src0Hi = hiOperand(Src0); 4528 Operand *Src0Hi = hiOperand(Src0);
4628 if (NumCases >= 2) { 4529 if (NumCases >= 2) {
4629 Src0Lo = legalizeToVar(Src0Lo); 4530 Src0Lo = legalizeToVar(Src0Lo);
(...skipping 23 matching lines...) Expand all
4653 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); 4554 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4654 for (SizeT I = 0; I < NumCases; ++I) { 4555 for (SizeT I = 0; I < NumCases; ++I) {
4655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); 4556 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4656 _cmp(Src0, Value); 4557 _cmp(Src0, Value);
4657 _br(CondX86::Br_e, Inst->getLabel(I)); 4558 _br(CondX86::Br_e, Inst->getLabel(I));
4658 } 4559 }
4659 4560
4660 _br(Inst->getLabelDefault()); 4561 _br(Inst->getLabelDefault());
4661 } 4562 }
4662 4563
4663 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4564 template <class Machine>
4664 Variable *Dest, Operand *Src0, 4565 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4665 Operand *Src1) { 4566 Variable *Dest, Operand *Src0,
4567 Operand *Src1) {
4666 assert(isVectorType(Dest->getType())); 4568 assert(isVectorType(Dest->getType()));
4667 Type Ty = Dest->getType(); 4569 Type Ty = Dest->getType();
4668 Type ElementTy = typeElementType(Ty); 4570 Type ElementTy = typeElementType(Ty);
4669 SizeT NumElements = typeNumElements(Ty); 4571 SizeT NumElements = typeNumElements(Ty);
4670 4572
4671 Operand *T = Ctx->getConstantUndef(Ty); 4573 Operand *T = Ctx->getConstantUndef(Ty);
4672 for (SizeT I = 0; I < NumElements; ++I) { 4574 for (SizeT I = 0; I < NumElements; ++I) {
4673 Constant *Index = Ctx->getConstantInt32(I); 4575 Constant *Index = Ctx->getConstantInt32(I);
4674 4576
4675 // Extract the next two inputs. 4577 // Extract the next two inputs.
(...skipping 16 matching lines...) Expand all
4692 } 4594 }
4693 4595
4694 // The following pattern occurs often in lowered C and C++ code: 4596 // The following pattern occurs often in lowered C and C++ code:
4695 // 4597 //
4696 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4598 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4697 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 4599 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
4698 // 4600 //
4699 // We can eliminate the sext operation by copying the result of pcmpeqd, 4601 // We can eliminate the sext operation by copying the result of pcmpeqd,
4700 // pcmpgtd, or cmpps (which produce sign extended results) to the result 4602 // pcmpgtd, or cmpps (which produce sign extended results) to the result
4701 // of the sext operation. 4603 // of the sext operation.
4702 void TargetX8632::eliminateNextVectorSextInstruction( 4604 template <class Machine>
4605 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4703 Variable *SignExtendedResult) { 4606 Variable *SignExtendedResult) {
4704 if (InstCast *NextCast = 4607 if (InstCast *NextCast =
4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { 4608 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4706 if (NextCast->getCastKind() == InstCast::Sext && 4609 if (NextCast->getCastKind() == InstCast::Sext &&
4707 NextCast->getSrc(0) == SignExtendedResult) { 4610 NextCast->getSrc(0) == SignExtendedResult) {
4708 NextCast->setDeleted(); 4611 NextCast->setDeleted();
4709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4612 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4710 // Skip over the instruction. 4613 // Skip over the instruction.
4711 Context.advanceNext(); 4614 Context.advanceNext();
4712 } 4615 }
4713 } 4616 }
4714 } 4617 }
4715 4618
4716 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } 4619 template <class Machine>
4620 void TargetX86Base<Machine>::lowerUnreachable(
4621 const InstUnreachable * /*Inst*/) {
4622 _ud2();
4623 }
4717 4624
4718 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { 4625 template <class Machine>
4626 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) {
4719 // If the beacon variable's live range does not end in this 4627 // If the beacon variable's live range does not end in this
4720 // instruction, then it must end in the modified Store instruction 4628 // instruction, then it must end in the modified Store instruction
4721 // that follows. This means that the original Store instruction is 4629 // that follows. This means that the original Store instruction is
4722 // still there, either because the value being stored is used beyond 4630 // still there, either because the value being stored is used beyond
4723 // the Store instruction, or because dead code elimination did not 4631 // the Store instruction, or because dead code elimination did not
4724 // happen. In either case, we cancel RMW lowering (and the caller 4632 // happen. In either case, we cancel RMW lowering (and the caller
4725 // deletes the RMW instruction). 4633 // deletes the RMW instruction).
4726 if (!RMW->isLastUse(RMW->getBeacon())) 4634 if (!RMW->isLastUse(RMW->getBeacon()))
4727 return; 4635 return;
4728 Operand *Src = RMW->getData(); 4636 Operand *Src = RMW->getData();
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
4782 return; 4690 return;
4783 case InstArithmetic::Xor: 4691 case InstArithmetic::Xor:
4784 Src = legalize(Src, Legal_Reg | Legal_Imm); 4692 Src = legalize(Src, Legal_Reg | Legal_Imm);
4785 _xor_rmw(Addr, Src); 4693 _xor_rmw(Addr, Src);
4786 return; 4694 return;
4787 } 4695 }
4788 } 4696 }
4789 llvm::report_fatal_error("Couldn't lower RMW instruction"); 4697 llvm::report_fatal_error("Couldn't lower RMW instruction");
4790 } 4698 }
4791 4699
4792 void TargetX8632::lowerOther(const Inst *Instr) { 4700 template <class Machine>
4701 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { 4702 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4794 lowerRMW(RMW); 4703 lowerRMW(RMW);
4795 } else { 4704 } else {
4796 TargetLowering::lowerOther(Instr); 4705 TargetLowering::lowerOther(Instr);
4797 } 4706 }
4798 } 4707 }
4799 4708
4800 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4709 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4801 // preserve integrity of liveness analysis. Undef values are also 4710 // preserve integrity of liveness analysis. Undef values are also
4802 // turned into zeroes, since loOperand() and hiOperand() don't expect 4711 // turned into zeroes, since loOperand() and hiOperand() don't expect
4803 // Undef input. 4712 // Undef input.
4804 void TargetX8632::prelowerPhis() { 4713 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4805 // Pause constant blinding or pooling, blinding or pooling will be done later 4714 // Pause constant blinding or pooling, blinding or pooling will be done later
4806 // during phi lowering assignments 4715 // during phi lowering assignments
4807 BoolFlagSaver B(RandomizationPoolingPaused, true); 4716 BoolFlagSaver B(RandomizationPoolingPaused, true);
4808 4717
4809 CfgNode *Node = Context.getNode(); 4718 CfgNode *Node = Context.getNode();
4810 for (Inst &I : Node->getPhis()) { 4719 for (Inst &I : Node->getPhis()) {
4811 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4720 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4812 if (Phi->isDeleted()) 4721 if (Phi->isDeleted())
4813 continue; 4722 continue;
4814 Variable *Dest = Phi->getDest(); 4723 Variable *Dest = Phi->getDest();
(...skipping 10 matching lines...) Expand all
4825 PhiLo->addArgument(loOperand(Src), Label); 4734 PhiLo->addArgument(loOperand(Src), Label);
4826 PhiHi->addArgument(hiOperand(Src), Label); 4735 PhiHi->addArgument(hiOperand(Src), Label);
4827 } 4736 }
4828 Node->getPhis().push_back(PhiLo); 4737 Node->getPhis().push_back(PhiLo);
4829 Node->getPhis().push_back(PhiHi); 4738 Node->getPhis().push_back(PhiHi);
4830 Phi->setDeleted(); 4739 Phi->setDeleted();
4831 } 4740 }
4832 } 4741 }
4833 } 4742 }
4834 4743
4835 namespace {
4836
4837 bool isMemoryOperand(const Operand *Opnd) { 4744 bool isMemoryOperand(const Operand *Opnd) {
4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd)) 4745 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4839 return !Var->hasReg(); 4746 return !Var->hasReg();
4840 // We treat vector undef values the same as a memory operand, 4747 // We treat vector undef values the same as a memory operand,
4841 // because they do in fact need a register to materialize the vector 4748 // because they do in fact need a register to materialize the vector
4842 // of zeroes into. 4749 // of zeroes into.
4843 if (llvm::isa<ConstantUndef>(Opnd)) 4750 if (llvm::isa<ConstantUndef>(Opnd))
4844 return isScalarFloatingType(Opnd->getType()) || 4751 return isScalarFloatingType(Opnd->getType()) ||
4845 isVectorType(Opnd->getType()); 4752 isVectorType(Opnd->getType());
4846 if (llvm::isa<Constant>(Opnd)) 4753 if (llvm::isa<Constant>(Opnd))
4847 return isScalarFloatingType(Opnd->getType()); 4754 return isScalarFloatingType(Opnd->getType());
4848 return true; 4755 return true;
4849 } 4756 }
4850 4757
4851 } // end of anonymous namespace
4852
4853 // Lower the pre-ordered list of assignments into mov instructions. 4758 // Lower the pre-ordered list of assignments into mov instructions.
4854 // Also has to do some ad-hoc register allocation as necessary. 4759 // Also has to do some ad-hoc register allocation as necessary.
4855 void TargetX8632::lowerPhiAssignments(CfgNode *Node, 4760 template <class Machine>
4856 const AssignList &Assignments) { 4761 void TargetX86Base<Machine>::lowerPhiAssignments(
4762 CfgNode *Node, const AssignList &Assignments) {
4857 // Check that this is a properly initialized shell of a node. 4763 // Check that this is a properly initialized shell of a node.
4858 assert(Node->getOutEdges().size() == 1); 4764 assert(Node->getOutEdges().size() == 1);
4859 assert(Node->getInsts().empty()); 4765 assert(Node->getInsts().empty());
4860 assert(Node->getPhis().empty()); 4766 assert(Node->getPhis().empty());
4861 CfgNode *Succ = Node->getOutEdges().front(); 4767 CfgNode *Succ = Node->getOutEdges().front();
4862 getContext().init(Node); 4768 getContext().init(Node);
4863 // Register set setup similar to regAlloc(). 4769 // Register set setup similar to regAlloc().
4864 RegSetMask RegInclude = RegSet_All; 4770 RegSetMask RegInclude = RegSet_All;
4865 RegSetMask RegExclude = RegSet_StackPointer; 4771 RegSetMask RegExclude = RegSet_StackPointer;
4866 if (hasFramePointer()) 4772 if (hasFramePointer())
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
4997 _br(Succ); 4903 _br(Succ);
4998 } 4904 }
4999 4905
5000 // There is no support for loading or emitting vector constants, so the 4906 // There is no support for loading or emitting vector constants, so the
5001 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4907 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
5002 // etc. are initialized with register operations. 4908 // etc. are initialized with register operations.
5003 // 4909 //
5004 // TODO(wala): Add limited support for vector constants so that 4910 // TODO(wala): Add limited support for vector constants so that
5005 // complex initialization in registers is unnecessary. 4911 // complex initialization in registers is unnecessary.
5006 4912
5007 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { 4913 template <class Machine>
4914 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
5008 Variable *Reg = makeReg(Ty, RegNum); 4915 Variable *Reg = makeReg(Ty, RegNum);
5009 // Insert a FakeDef, since otherwise the live range of Reg might 4916 // Insert a FakeDef, since otherwise the live range of Reg might
5010 // be overestimated. 4917 // be overestimated.
5011 Context.insert(InstFakeDef::create(Func, Reg)); 4918 Context.insert(InstFakeDef::create(Func, Reg));
5012 _pxor(Reg, Reg); 4919 _pxor(Reg, Reg);
5013 return Reg; 4920 return Reg;
5014 } 4921 }
5015 4922
5016 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { 4923 template <class Machine>
4924 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
4925 int32_t RegNum) {
5017 Variable *MinusOnes = makeReg(Ty, RegNum); 4926 Variable *MinusOnes = makeReg(Ty, RegNum);
5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated. 4927 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
5019 Context.insert(InstFakeDef::create(Func, MinusOnes)); 4928 Context.insert(InstFakeDef::create(Func, MinusOnes));
5020 _pcmpeq(MinusOnes, MinusOnes); 4929 _pcmpeq(MinusOnes, MinusOnes);
5021 return MinusOnes; 4930 return MinusOnes;
5022 } 4931 }
5023 4932
5024 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { 4933 template <class Machine>
4934 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {
5025 Variable *Dest = makeVectorOfZeros(Ty, RegNum); 4935 Variable *Dest = makeVectorOfZeros(Ty, RegNum);
5026 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 4936 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
5027 _psub(Dest, MinusOne); 4937 _psub(Dest, MinusOne);
5028 return Dest; 4938 return Dest;
5029 } 4939 }
5030 4940
5031 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { 4941 template <class Machine>
4942 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
4943 int32_t RegNum) {
5032 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4944 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5033 Ty == IceType_v16i8); 4945 Ty == IceType_v16i8);
5034 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4946 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4947 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; 4948 SizeT Shift =
4949 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
5037 _psll(Reg, Ctx->getConstantInt8(Shift)); 4950 _psll(Reg, Ctx->getConstantInt8(Shift));
5038 return Reg; 4951 return Reg;
5039 } else { 4952 } else {
5040 // SSE has no left shift operation for vectors of 8 bit integers. 4953 // SSE has no left shift operation for vectors of 8 bit integers.
5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4954 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
5042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 4955 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
5043 Variable *Reg = makeReg(Ty, RegNum); 4956 Variable *Reg = makeReg(Ty, RegNum);
5044 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4957 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4958 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5046 return Reg; 4959 return Reg;
5047 } 4960 }
5048 } 4961 }
5049 4962
5050 // Construct a mask in a register that can be and'ed with a 4963 // Construct a mask in a register that can be and'ed with a
5051 // floating-point value to mask off its sign bit. The value will be 4964 // floating-point value to mask off its sign bit. The value will be
5052 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> 4965 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
5053 // for f64. Construct it as vector of ones logically right shifted 4966 // for f64. Construct it as vector of ones logically right shifted
5054 // one bit. TODO(stichnot): Fix the wala TODO above, to represent 4967 // one bit. TODO(stichnot): Fix the wala TODO above, to represent
5055 // vector constants in memory. 4968 // vector constants in memory.
5056 Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) { 4969 template <class Machine>
4970 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4971 int32_t RegNum) {
5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4972 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
5058 _psrl(Reg, Ctx->getConstantInt8(1)); 4973 _psrl(Reg, Ctx->getConstantInt8(1));
5059 return Reg; 4974 return Reg;
5060 } 4975 }
5061 4976
5062 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 4977 template <class Machine>
5063 Variable *Slot, 4978 OperandX8632Mem *
5064 uint32_t Offset) { 4979 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4980 uint32_t Offset) {
5065 // Ensure that Loc is a stack slot. 4981 // Ensure that Loc is a stack slot.
5066 assert(Slot->getWeight().isZero()); 4982 assert(Slot->getWeight().isZero());
5067 assert(Slot->getRegNum() == Variable::NoRegister); 4983 assert(Slot->getRegNum() == Variable::NoRegister);
5068 // Compute the location of Loc in memory. 4984 // Compute the location of Loc in memory.
5069 // TODO(wala,stichnot): lea should not be required. The address of 4985 // TODO(wala,stichnot): lea should not be required. The address of
5070 // the stack slot is known at compile time (although not until after 4986 // the stack slot is known at compile time (although not until after
5071 // addProlog()). 4987 // addProlog()).
5072 const Type PointerType = IceType_i32; 4988 const Type PointerType = IceType_i32;
5073 Variable *Loc = makeReg(PointerType); 4989 Variable *Loc = makeReg(PointerType);
5074 _lea(Loc, Slot); 4990 _lea(Loc, Slot);
5075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 4991 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4992 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
5077 } 4993 }
5078 4994
5079 // Helper for legalize() to emit the right code to lower an operand to a 4995 // Helper for legalize() to emit the right code to lower an operand to a
5080 // register of the appropriate type. 4996 // register of the appropriate type.
5081 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 4997 template <class Machine>
4998 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5082 Type Ty = Src->getType(); 4999 Type Ty = Src->getType();
5083 Variable *Reg = makeReg(Ty, RegNum); 5000 Variable *Reg = makeReg(Ty, RegNum);
5084 if (isVectorType(Ty)) { 5001 if (isVectorType(Ty)) {
5085 _movp(Reg, Src); 5002 _movp(Reg, Src);
5086 } else { 5003 } else {
5087 _mov(Reg, Src); 5004 _mov(Reg, Src);
5088 } 5005 }
5089 return Reg; 5006 return Reg;
5090 } 5007 }
5091 5008
5092 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, 5009 template <class Machine>
5093 int32_t RegNum) { 5010 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
5011 int32_t RegNum) {
5094 Type Ty = From->getType(); 5012 Type Ty = From->getType();
5095 // Assert that a physical register is allowed. To date, all calls 5013 // Assert that a physical register is allowed. To date, all calls
5096 // to legalize() allow a physical register. If a physical register 5014 // to legalize() allow a physical register. If a physical register
5097 // needs to be explicitly disallowed, then new code will need to be 5015 // needs to be explicitly disallowed, then new code will need to be
5098 // written to force a spill. 5016 // written to force a spill.
5099 assert(Allowed & Legal_Reg); 5017 assert(Allowed & Legal_Reg);
5100 // If we're asking for a specific physical register, make sure we're 5018 // If we're asking for a specific physical register, make sure we're
5101 // not allowing any other operand kinds. (This could be future 5019 // not allowing any other operand kinds. (This could be future
5102 // work, e.g. allow the shl shift amount to be either an immediate 5020 // work, e.g. allow the shl shift amount to be either an immediate
5103 // or in ecx.) 5021 // or in ecx.)
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
5196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5114 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5197 From = copyToReg(From, RegNum); 5115 From = copyToReg(From, RegNum);
5198 } 5116 }
5199 return From; 5117 return From;
5200 } 5118 }
5201 llvm_unreachable("Unhandled operand kind in legalize()"); 5119 llvm_unreachable("Unhandled operand kind in legalize()");
5202 return From; 5120 return From;
5203 } 5121 }
5204 5122
5205 // Provide a trivial wrapper to legalize() for this common usage. 5123 // Provide a trivial wrapper to legalize() for this common usage.
5206 Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) { 5124 template <class Machine>
5125 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) {
5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 5126 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5208 } 5127 }
5209 5128
5210 // For the cmp instruction, if Src1 is an immediate, or known to be a 5129 // For the cmp instruction, if Src1 is an immediate, or known to be a
5211 // physical register, we can allow Src0 to be a memory operand. 5130 // physical register, we can allow Src0 to be a memory operand.
5212 // Otherwise, Src0 must be copied into a physical register. 5131 // Otherwise, Src0 must be copied into a physical register.
5213 // (Actually, either Src0 or Src1 can be chosen for the physical 5132 // (Actually, either Src0 or Src1 can be chosen for the physical
5214 // register, but unfortunately we have to commit to one or the other 5133 // register, but unfortunately we have to commit to one or the other
5215 // before register allocation.) 5134 // before register allocation.)
5216 Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { 5135 template <class Machine>
5136 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5137 Operand *Src1) {
5217 bool IsSrc1ImmOrReg = false; 5138 bool IsSrc1ImmOrReg = false;
5218 if (llvm::isa<Constant>(Src1)) { 5139 if (llvm::isa<Constant>(Src1)) {
5219 IsSrc1ImmOrReg = true; 5140 IsSrc1ImmOrReg = true;
5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 5141 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5221 if (Var->hasReg()) 5142 if (Var->hasReg())
5222 IsSrc1ImmOrReg = true; 5143 IsSrc1ImmOrReg = true;
5223 } 5144 }
5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 5145 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5225 } 5146 }
5226 5147
5227 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty, 5148 template <class Machine>
5228 bool DoLegalize) { 5149 OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd,
5150 Type Ty,
5151 bool DoLegalize) {
5229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); 5152 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);
5230 // It may be the case that address mode optimization already creates 5153 // It may be the case that address mode optimization already creates
5231 // an OperandX8632Mem, so in that case it wouldn't need another level 5154 // an OperandX8632Mem, so in that case it wouldn't need another level
5232 // of transformation. 5155 // of transformation.
5233 if (!Mem) { 5156 if (!Mem) {
5234 Variable *Base = llvm::dyn_cast<Variable>(Opnd); 5157 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); 5158 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5236 assert(Base || Offset); 5159 assert(Base || Offset);
5237 if (Offset) { 5160 if (Offset) {
5238 // During memory operand building, we do not blind or pool 5161 // During memory operand building, we do not blind or pool
(...skipping 11 matching lines...) Expand all
5250 llvm::isa<ConstantRelocatable>(Offset)); 5173 llvm::isa<ConstantRelocatable>(Offset));
5251 } 5174 }
5252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 5175 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
5253 } 5176 }
5254 // Do legalization, which contains randomization/pooling 5177 // Do legalization, which contains randomization/pooling
5255 // or do randomization/pooling. 5178 // or do randomization/pooling.
5256 return llvm::cast<OperandX8632Mem>( 5179 return llvm::cast<OperandX8632Mem>(
5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 5180 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5258 } 5181 }
5259 5182
5260 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { 5183 template <class Machine>
5184 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5261 // There aren't any 64-bit integer registers for x86-32. 5185 // There aren't any 64-bit integer registers for x86-32.
5262 assert(Type != IceType_i64); 5186 assert(Type != IceType_i64);
5263 Variable *Reg = Func->makeVariable(Type); 5187 Variable *Reg = Func->makeVariable(Type);
5264 if (RegNum == Variable::NoRegister) 5188 if (RegNum == Variable::NoRegister)
5265 Reg->setWeightInfinite(); 5189 Reg->setWeightInfinite();
5266 else 5190 else
5267 Reg->setRegNum(RegNum); 5191 Reg->setRegNum(RegNum);
5268 return Reg; 5192 return Reg;
5269 } 5193 }
5270 5194
5271 void TargetX8632::postLower() { 5195 template <class Machine> void TargetX86Base<Machine>::postLower() {
5272 if (Ctx->getFlags().getOptLevel() == Opt_m1) 5196 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5273 return; 5197 return;
5274 inferTwoAddress(); 5198 inferTwoAddress();
5275 } 5199 }
5276 5200
5277 void TargetX8632::makeRandomRegisterPermutation( 5201 template <class Machine>
5202 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5278 llvm::SmallVectorImpl<int32_t> &Permutation, 5203 llvm::SmallVectorImpl<int32_t> &Permutation,
5279 const llvm::SmallBitVector &ExcludeRegisters) const { 5204 const llvm::SmallBitVector &ExcludeRegisters) const {
5280 // TODO(stichnot): Declaring Permutation this way loses type/size 5205 // TODO(stichnot): Declaring Permutation this way loses type/size
5281 // information. Fix this in conjunction with the caller-side TODO. 5206 // information. Fix this in conjunction with the caller-side TODO.
5282 assert(Permutation.size() >= RegX8632::Reg_NUM); 5207 assert(Permutation.size() >= RegX8632::Reg_NUM);
5283 // Expected upper bound on the number of registers in a single 5208 // Expected upper bound on the number of registers in a single
5284 // equivalence class. For x86-32, this would comprise the 8 XMM 5209 // equivalence class. For x86-32, this would comprise the 8 XMM
5285 // registers. This is for performance, not correctness. 5210 // registers. This is for performance, not correctness.
5286 static const unsigned MaxEquivalenceClassSize = 8; 5211 static const unsigned MaxEquivalenceClassSize = 8;
5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; 5212 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
5334 if (!First) 5259 if (!First)
5335 Str << " "; 5260 Str << " ";
5336 First = false; 5261 First = false;
5337 Str << getRegName(Register, IceType_i32); 5262 Str << getRegName(Register, IceType_i32);
5338 } 5263 }
5339 Str << "}\n"; 5264 Str << "}\n";
5340 } 5265 }
5341 } 5266 }
5342 } 5267 }
5343 5268
5344 void TargetX8632::emit(const ConstantInteger32 *C) const { 5269 template <class Machine>
5270 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
5345 if (!ALLOW_DUMP) 5271 if (!ALLOW_DUMP)
5346 return; 5272 return;
5347 Ostream &Str = Ctx->getStrEmit(); 5273 Ostream &Str = Ctx->getStrEmit();
5348 Str << getConstantPrefix() << C->getValue(); 5274 Str << getConstantPrefix() << C->getValue();
5349 } 5275 }
5350 5276
5351 void TargetX8632::emit(const ConstantInteger64 *) const { 5277 template <class Machine>
5278 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 5279 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5353 } 5280 }
5354 5281
5355 void TargetX8632::emit(const ConstantFloat *C) const { 5282 template <class Machine>
5283 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
5356 if (!ALLOW_DUMP) 5284 if (!ALLOW_DUMP)
5357 return; 5285 return;
5358 Ostream &Str = Ctx->getStrEmit(); 5286 Ostream &Str = Ctx->getStrEmit();
5359 C->emitPoolLabel(Str); 5287 C->emitPoolLabel(Str);
5360 } 5288 }
5361 5289
5362 void TargetX8632::emit(const ConstantDouble *C) const { 5290 template <class Machine>
5291 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {
5363 if (!ALLOW_DUMP) 5292 if (!ALLOW_DUMP)
5364 return; 5293 return;
5365 Ostream &Str = Ctx->getStrEmit(); 5294 Ostream &Str = Ctx->getStrEmit();
5366 C->emitPoolLabel(Str); 5295 C->emitPoolLabel(Str);
5367 } 5296 }
5368 5297
5369 void TargetX8632::emit(const ConstantUndef *) const { 5298 template <class Machine>
5299 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5370 llvm::report_fatal_error("undef value encountered by emitter."); 5300 llvm::report_fatal_error("undef value encountered by emitter.");
5371 } 5301 }
5372 5302
5373 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
5374 : TargetDataLowering(Ctx) {}
5375
5376 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
5377 const IceString &SectionSuffix) {
5378 switch (Ctx->getFlags().getOutFileType()) {
5379 case FT_Elf: {
5380 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
5382 } break;
5383 case FT_Asm:
5384 case FT_Iasm: {
5385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
5386 OstreamLocker L(Ctx);
5387 for (const VariableDeclaration *Var : Vars) {
5388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
5389 emitGlobal(*Var, SectionSuffix);
5390 }
5391 }
5392 } break;
5393 }
5394 }
5395
5396 template <typename T> struct PoolTypeConverter {};
5397
5398 template <> struct PoolTypeConverter<float> {
5399 typedef uint32_t PrimitiveIntType;
5400 typedef ConstantFloat IceType;
5401 static const Type Ty = IceType_f32;
5402 static const char *TypeName;
5403 static const char *AsmTag;
5404 static const char *PrintfString;
5405 };
5406 const char *PoolTypeConverter<float>::TypeName = "float";
5407 const char *PoolTypeConverter<float>::AsmTag = ".long";
5408 const char *PoolTypeConverter<float>::PrintfString = "0x%x";
5409
5410 template <> struct PoolTypeConverter<double> {
5411 typedef uint64_t PrimitiveIntType;
5412 typedef ConstantDouble IceType;
5413 static const Type Ty = IceType_f64;
5414 static const char *TypeName;
5415 static const char *AsmTag;
5416 static const char *PrintfString;
5417 };
5418 const char *PoolTypeConverter<double>::TypeName = "double";
5419 const char *PoolTypeConverter<double>::AsmTag = ".quad";
5420 const char *PoolTypeConverter<double>::PrintfString = "0x%llx";
5421
5422 // Add converter for int type constant pooling
5423 template <> struct PoolTypeConverter<uint32_t> {
5424 typedef uint32_t PrimitiveIntType;
5425 typedef ConstantInteger32 IceType;
5426 static const Type Ty = IceType_i32;
5427 static const char *TypeName;
5428 static const char *AsmTag;
5429 static const char *PrintfString;
5430 };
5431 const char *PoolTypeConverter<uint32_t>::TypeName = "i32";
5432 const char *PoolTypeConverter<uint32_t>::AsmTag = ".long";
5433 const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x";
5434
5435 // Add converter for int type constant pooling
5436 template <> struct PoolTypeConverter<uint16_t> {
5437 typedef uint32_t PrimitiveIntType;
5438 typedef ConstantInteger32 IceType;
5439 static const Type Ty = IceType_i16;
5440 static const char *TypeName;
5441 static const char *AsmTag;
5442 static const char *PrintfString;
5443 };
5444 const char *PoolTypeConverter<uint16_t>::TypeName = "i16";
5445 const char *PoolTypeConverter<uint16_t>::AsmTag = ".short";
5446 const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x";
5447
5448 // Add converter for int type constant pooling
5449 template <> struct PoolTypeConverter<uint8_t> {
5450 typedef uint32_t PrimitiveIntType;
5451 typedef ConstantInteger32 IceType;
5452 static const Type Ty = IceType_i8;
5453 static const char *TypeName;
5454 static const char *AsmTag;
5455 static const char *PrintfString;
5456 };
5457 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
5458 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
5459 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
5460
5461 template <typename T>
5462 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
5463 if (!ALLOW_DUMP)
5464 return;
5465 Ostream &Str = Ctx->getStrEmit();
5466 Type Ty = T::Ty;
5467 SizeT Align = typeAlignInBytes(Ty);
5468 ConstantList Pool = Ctx->getConstantPool(Ty);
5469
5470 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
5471 << "\n";
5472 Str << "\t.align\t" << Align << "\n";
5473 for (Constant *C : Pool) {
5474 if (!C->getShouldBePooled())
5475 continue;
5476 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);
5477 typename T::IceType::PrimType Value = Const->getValue();
5478 // Use memcpy() to copy bits from Value into RawValue in a way
5479 // that avoids breaking strict-aliasing rules.
5480 typename T::PrimitiveIntType RawValue;
5481 memcpy(&RawValue, &Value, sizeof(Value));
5482 char buf[30];
5483 int CharsPrinted =
5484 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);
5485 assert(CharsPrinted >= 0 &&
5486 (size_t)CharsPrinted < llvm::array_lengthof(buf));
5487 (void)CharsPrinted; // avoid warnings if asserts are disabled
5488 Const->emitPoolLabel(Str);
5489 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "
5490 << Value << "\n";
5491 }
5492 }
5493
5494 void TargetDataX8632::lowerConstants() {
5495 if (Ctx->getFlags().getDisableTranslation())
5496 return;
5497 // No need to emit constants from the int pool since (for x86) they
5498 // are embedded as immediates in the instructions, just emit float/double.
5499 switch (Ctx->getFlags().getOutFileType()) {
5500 case FT_Elf: {
5501 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5502
5503 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);
5504 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);
5505 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);
5506
5507 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5508 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5509 } break;
5510 case FT_Asm:
5511 case FT_Iasm: {
5512 OstreamLocker L(Ctx);
5513
5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);
5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);
5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);
5517
5518 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5519 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5520 } break;
5521 }
5522 }
5523
5524 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5525 : TargetHeaderLowering(Ctx) {}
5526
5527 // Randomize or pool an Immediate. 5303 // Randomize or pool an Immediate.
5528 Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate, 5304 template <class Machine>
5529 int32_t RegNum) { 5305 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5306 int32_t RegNum) {
5530 assert(llvm::isa<ConstantInteger32>(Immediate) || 5307 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5531 llvm::isa<ConstantRelocatable>(Immediate)); 5308 llvm::isa<ConstantRelocatable>(Immediate));
5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5309 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5533 RandomizationPoolingPaused == true) { 5310 RandomizationPoolingPaused == true) {
5534 // Immediates randomization/pooling off or paused 5311 // Immediates randomization/pooling off or paused
5535 return Immediate; 5312 return Immediate;
5536 } 5313 }
5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { 5314 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
5538 Ctx->statsUpdateRPImms(); 5315 Ctx->statsUpdateRPImms();
5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5316 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); 5372 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);
5596 _mov(Reg, MemOperand); 5373 _mov(Reg, MemOperand);
5597 return Reg; 5374 return Reg;
5598 } 5375 }
5599 assert("Unsupported -randomize-pool-immediates option" && false); 5376 assert("Unsupported -randomize-pool-immediates option" && false);
5600 } 5377 }
5601 // the constant Immediate is not eligible for blinding/pooling 5378 // the constant Immediate is not eligible for blinding/pooling
5602 return Immediate; 5379 return Immediate;
5603 } 5380 }
5604 5381
5382 template <class Machine>
5605 OperandX8632Mem * 5383 OperandX8632Mem *
5606 TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, 5384 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
5607 int32_t RegNum) { 5385 int32_t RegNum) {
5608 assert(MemOperand); 5386 assert(MemOperand);
5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5387 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5610 RandomizationPoolingPaused == true) { 5388 RandomizationPoolingPaused == true) {
5611 // immediates randomization/pooling is turned off 5389 // immediates randomization/pooling is turned off
5612 return MemOperand; 5390 return MemOperand;
5613 } 5391 }
5614 5392
5615 // If this memory operand is already a randommized one, we do 5393 // If this memory operand is already a randommized one, we do
5616 // not randomize it again. 5394 // not randomize it again.
5617 if (MemOperand->getRandomized()) 5395 if (MemOperand->getRandomized())
5618 return MemOperand; 5396 return MemOperand;
5619 5397
5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { 5398 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5621 if (C->shouldBeRandomizedOrPooled(Ctx)) { 5399 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5622 // The offset of this mem operand should be blinded or pooled 5400 // The offset of this mem operand should be blinded or pooled
5623 Ctx->statsUpdateRPImms(); 5401 Ctx->statsUpdateRPImms();
5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5402 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5625 RPI_Randomize) { 5403 RPI_Randomize) {
5626 // blind the constant offset 5404 // blind the constant offset
5627 // FROM: 5405 // FROM:
5628 // offset[base, index, shift] 5406 // offset[base, index, shift]
5629 // TO: 5407 // TO:
5630 // insert: lea offset+cookie[base], RegTemp 5408 // insert: lea offset+cookie[base], RegTemp
5631 // => -cookie[RegTemp, index, shift] 5409 // => -cookie[RegTemp, index, shift]
5632 uint32_t Value = 5410 uint32_t Value = llvm::dyn_cast<ConstantInteger32>(
5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) 5411 MemOperand->getOffset())->getValue();
5634 ->getValue();
5635 uint32_t Cookie = Ctx->getRandomizationCookie(); 5412 uint32_t Cookie = Ctx->getRandomizationCookie();
5636 Constant *Mask1 = Ctx->getConstantInt( 5413 Constant *Mask1 = Ctx->getConstantInt(
5637 MemOperand->getOffset()->getType(), Cookie + Value); 5414 MemOperand->getOffset()->getType(), Cookie + Value);
5638 Constant *Mask2 = 5415 Constant *Mask2 =
5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5416 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5640 5417
5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( 5418 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(
5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); 5419 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
5643 // If we have already assigned a physical register, we must come from 5420 // If we have already assigned a physical register, we must come from
5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5421 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
5710 return NewMemOperand; 5487 return NewMemOperand;
5711 } 5488 }
5712 assert("Unsupported -randomize-pool-immediates option" && false); 5489 assert("Unsupported -randomize-pool-immediates option" && false);
5713 } 5490 }
5714 } 5491 }
5715 // the offset is not eligible for blinding or pooling, return the original 5492 // the offset is not eligible for blinding or pooling, return the original
5716 // mem operand 5493 // mem operand
5717 return MemOperand; 5494 return MemOperand;
5718 } 5495 }
5719 5496
5497 } // end of namespace X86Internal
5720 } // end of namespace Ice 5498 } // end of namespace Ice
OLDNEW
« src/IceInst.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698