Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering ----------===// |
|
Jim Stichnoth
2015/06/22 21:52:02
add the "-*- C++ -*-" stuff
John
2015/06/22 22:09:23
Done, but why? Also, this C++ thingy adds no real
Jim Stichnoth
2015/06/22 23:04:05
This is an emacs thing, so that it invokes c++-mod
John
2015/06/22 23:09:55
Use vim instead? :)
Seriously, though, it is puzz
| |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX86Base class, which |
| 11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
| 12 // high-level instruction. | 12 // high-level instruction. |
| 13 // | 13 // |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
|
Jim Stichnoth
2015/06/22 21:52:02
Should there be an include guard?
John
2015/06/22 22:09:23
It doesn't hurt, but Impl files should not be incl
Jim Stichnoth
2015/06/22 23:04:05
OK, I see.
| |
| 16 #include "llvm/Support/MathExtras.h" | 16 #include "llvm/Support/MathExtras.h" |
| 17 | 17 |
| 18 #include "IceCfg.h" | 18 #include "IceCfg.h" |
| 19 #include "IceCfgNode.h" | 19 #include "IceCfgNode.h" |
| 20 #include "IceClFlags.h" | 20 #include "IceClFlags.h" |
| 21 #include "IceDefs.h" | 21 #include "IceDefs.h" |
| 22 #include "IceELFObjectWriter.h" | 22 #include "IceELFObjectWriter.h" |
| 23 #include "IceGlobalInits.h" | 23 #include "IceGlobalInits.h" |
| 24 #include "IceInstX8632.h" | 24 #include "IceInstX8632.h" |
| 25 #include "IceLiveness.h" | 25 #include "IceLiveness.h" |
| 26 #include "IceOperand.h" | 26 #include "IceOperand.h" |
| 27 #include "IceRegistersX8632.h" | 27 #include "IceRegistersX8632.h" |
| 28 #include "IceTargetLoweringX8632.def" | 28 #include "IceTargetLoweringX8632.def" |
| 29 #include "IceTargetLoweringX8632.h" | 29 #include "IceTargetLoweringX8632.h" |
| 30 #include "IceUtils.h" | 30 #include "IceUtils.h" |
| 31 | 31 |
| 32 namespace Ice { | 32 namespace Ice { |
| 33 | 33 namespace X86Internal { |
| 34 namespace { | |
| 35 | |
| 36 // The following table summarizes the logic for lowering the fcmp | |
| 37 // instruction. There is one table entry for each of the 16 conditions. | |
| 38 // | |
| 39 // The first four columns describe the case when the operands are | |
| 40 // floating point scalar values. A comment in lowerFcmp() describes the | |
| 41 // lowering template. In the most general case, there is a compare | |
| 42 // followed by two conditional branches, because some fcmp conditions | |
| 43 // don't map to a single x86 conditional branch. However, in many cases | |
| 44 // it is possible to swap the operands in the comparison and have a | |
| 45 // single conditional branch. Since it's quite tedious to validate the | |
| 46 // table by hand, good execution tests are helpful. | |
| 47 // | |
| 48 // The last two columns describe the case when the operands are vectors | |
| 49 // of floating point values. For most fcmp conditions, there is a clear | |
| 50 // mapping to a single x86 cmpps instruction variant. Some fcmp | |
| 51 // conditions require special code to handle and these are marked in the | |
| 52 // table with a Cmpps_Invalid predicate. | |
| 53 const struct TableFcmp_ { | |
| 54 uint32_t Default; | |
| 55 bool SwapScalarOperands; | |
| 56 CondX86::BrCond C1, C2; | |
| 57 bool SwapVectorOperands; | |
| 58 CondX86::CmppsCond Predicate; | |
| 59 } TableFcmp[] = { | |
| 60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | |
| 61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ | |
| 62 , | |
| 63 FCMPX8632_TABLE | |
| 64 #undef X | |
| 65 }; | |
| 66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | |
| 67 | |
| 68 // The following table summarizes the logic for lowering the icmp instruction | |
| 69 // for i32 and narrower types. Each icmp condition has a clear mapping to an | |
| 70 // x86 conditional branch instruction. | |
| 71 | |
| 72 const struct TableIcmp32_ { | |
| 73 CondX86::BrCond Mapping; | |
| 74 } TableIcmp32[] = { | |
| 75 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
| 76 { CondX86::C_32 } \ | |
| 77 , | |
| 78 ICMPX8632_TABLE | |
| 79 #undef X | |
| 80 }; | |
| 81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); | |
| 82 | |
| 83 // The following table summarizes the logic for lowering the icmp instruction | |
| 84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | |
| 85 // conditional branches are needed. For the other conditions, three separate | |
| 86 // conditional branches are needed. | |
| 87 const struct TableIcmp64_ { | |
| 88 CondX86::BrCond C1, C2, C3; | |
| 89 } TableIcmp64[] = { | |
| 90 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
| 91 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \ | |
| 92 , | |
| 93 ICMPX8632_TABLE | |
| 94 #undef X | |
| 95 }; | |
| 96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | |
| 97 | |
| 98 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | |
| 99 size_t Index = static_cast<size_t>(Cond); | |
| 100 assert(Index < TableIcmp32Size); | |
| 101 return TableIcmp32[Index].Mapping; | |
| 102 } | |
| 103 | |
| 104 const struct TableTypeX8632Attributes_ { | |
| 105 Type InVectorElementType; | |
| 106 } TableTypeX8632Attributes[] = { | |
| 107 #define X(tag, elementty, cvt, sdss, pack, width, fld) \ | |
| 108 { elementty } \ | |
| 109 , | |
| 110 ICETYPEX8632_TABLE | |
| 111 #undef X | |
| 112 }; | |
| 113 const size_t TableTypeX8632AttributesSize = | |
| 114 llvm::array_lengthof(TableTypeX8632Attributes); | |
| 115 | |
| 116 // Return the type which the elements of the vector have in the X86 | |
| 117 // representation of the vector. | |
| 118 Type getInVectorElementType(Type Ty) { | |
| 119 assert(isVectorType(Ty)); | |
| 120 size_t Index = static_cast<size_t>(Ty); | |
| 121 (void)Index; | |
| 122 assert(Index < TableTypeX8632AttributesSize); | |
| 123 return TableTypeX8632Attributes[Ty].InVectorElementType; | |
| 124 } | |
| 125 | |
| 126 // The maximum number of arguments to pass in XMM registers | |
| 127 const uint32_t X86_MAX_XMM_ARGS = 4; | |
| 128 // The number of bits in a byte | |
| 129 const uint32_t X86_CHAR_BIT = 8; | |
| 130 // Stack alignment | |
| 131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; | |
| 132 // Size of the return address on the stack | |
| 133 const uint32_t X86_RET_IP_SIZE_BYTES = 4; | |
| 134 // The number of different NOP instructions | |
| 135 const uint32_t X86_NUM_NOP_VARIANTS = 5; | |
| 136 | |
| 137 // Value is in bytes. Return Value adjusted to the next highest multiple | |
| 138 // of the stack alignment. | |
| 139 uint32_t applyStackAlignment(uint32_t Value) { | |
| 140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | |
| 141 } | |
| 142 | |
| 143 // In some cases, there are x-macros tables for both high-level and | |
| 144 // low-level instructions/operands that use the same enum key value. | |
| 145 // The tables are kept separate to maintain a proper separation | |
| 146 // between abstraction layers. There is a risk that the tables could | |
| 147 // get out of sync if enum values are reordered or if entries are | |
| 148 // added or deleted. The following dummy namespaces use | |
| 149 // static_asserts to ensure everything is kept in sync. | |
| 150 | |
| 151 // Validate the enum values in FCMPX8632_TABLE. | |
| 152 namespace dummy1 { | |
| 153 // Define a temporary set of enum values based on low-level table | |
| 154 // entries. | |
| 155 enum _tmp_enum { | |
| 156 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, | |
| 157 FCMPX8632_TABLE | |
| 158 #undef X | |
| 159 _num | |
| 160 }; | |
| 161 // Define a set of constants based on high-level table entries. | |
| 162 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | |
| 163 ICEINSTFCMP_TABLE | |
| 164 #undef X | |
| 165 // Define a set of constants based on low-level table entries, and | |
| 166 // ensure the table entry keys are consistent. | |
| 167 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | |
| 168 static const int _table2_##val = _tmp_##val; \ | |
| 169 static_assert( \ | |
| 170 _table1_##val == _table2_##val, \ | |
| 171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); | |
| 172 FCMPX8632_TABLE | |
| 173 #undef X | |
| 174 // Repeat the static asserts with respect to the high-level table | |
| 175 // entries in case the high-level table has extra entries. | |
| 176 #define X(tag, str) \ | |
| 177 static_assert( \ | |
| 178 _table1_##tag == _table2_##tag, \ | |
| 179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); | |
| 180 ICEINSTFCMP_TABLE | |
| 181 #undef X | |
| 182 } // end of namespace dummy1 | |
| 183 | |
| 184 // Validate the enum values in ICMPX8632_TABLE. | |
| 185 namespace dummy2 { | |
| 186 // Define a temporary set of enum values based on low-level table | |
| 187 // entries. | |
| 188 enum _tmp_enum { | |
| 189 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | |
| 190 ICMPX8632_TABLE | |
| 191 #undef X | |
| 192 _num | |
| 193 }; | |
| 194 // Define a set of constants based on high-level table entries. | |
| 195 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | |
| 196 ICEINSTICMP_TABLE | |
| 197 #undef X | |
| 198 // Define a set of constants based on low-level table entries, and | |
| 199 // ensure the table entry keys are consistent. | |
| 200 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
| 201 static const int _table2_##val = _tmp_##val; \ | |
| 202 static_assert( \ | |
| 203 _table1_##val == _table2_##val, \ | |
| 204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); | |
| 205 ICMPX8632_TABLE | |
| 206 #undef X | |
| 207 // Repeat the static asserts with respect to the high-level table | |
| 208 // entries in case the high-level table has extra entries. | |
| 209 #define X(tag, str) \ | |
| 210 static_assert( \ | |
| 211 _table1_##tag == _table2_##tag, \ | |
| 212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); | |
| 213 ICEINSTICMP_TABLE | |
| 214 #undef X | |
| 215 } // end of namespace dummy2 | |
| 216 | |
| 217 // Validate the enum values in ICETYPEX8632_TABLE. | |
| 218 namespace dummy3 { | |
| 219 // Define a temporary set of enum values based on low-level table | |
| 220 // entries. | |
| 221 enum _tmp_enum { | |
| 222 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag, | |
| 223 ICETYPEX8632_TABLE | |
| 224 #undef X | |
| 225 _num | |
| 226 }; | |
| 227 // Define a set of constants based on high-level table entries. | |
| 228 #define X(tag, size, align, elts, elty, str) \ | |
| 229 static const int _table1_##tag = tag; | |
| 230 ICETYPE_TABLE | |
| 231 #undef X | |
| 232 // Define a set of constants based on low-level table entries, and | |
| 233 // ensure the table entry keys are consistent. | |
| 234 #define X(tag, elementty, cvt, sdss, pack, width, fld) \ | |
| 235 static const int _table2_##tag = _tmp_##tag; \ | |
| 236 static_assert(_table1_##tag == _table2_##tag, \ | |
| 237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | |
| 238 ICETYPEX8632_TABLE | |
| 239 #undef X | |
| 240 // Repeat the static asserts with respect to the high-level table | |
| 241 // entries in case the high-level table has extra entries. | |
| 242 #define X(tag, size, align, elts, elty, str) \ | |
| 243 static_assert(_table1_##tag == _table2_##tag, \ | |
| 244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | |
| 245 ICETYPE_TABLE | |
| 246 #undef X | |
| 247 } // end of namespace dummy3 | |
| 248 | 34 |
| 249 // A helper class to ease the settings of RandomizationPoolingPause | 35 // A helper class to ease the settings of RandomizationPoolingPause |
| 250 // to disable constant blinding or pooling for some translation phases. | 36 // to disable constant blinding or pooling for some translation phases. |
| 251 class BoolFlagSaver { | 37 class BoolFlagSaver { |
| 252 BoolFlagSaver() = delete; | 38 BoolFlagSaver() = delete; |
| 253 BoolFlagSaver(const BoolFlagSaver &) = delete; | 39 BoolFlagSaver(const BoolFlagSaver &) = delete; |
| 254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; | 40 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; |
| 255 | 41 |
| 256 public: | 42 public: |
| 257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } | 43 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } |
| 258 ~BoolFlagSaver() { Flag = OldValue; } | 44 ~BoolFlagSaver() { Flag = OldValue; } |
| 259 | 45 |
| 260 private: | 46 private: |
| 261 const bool OldValue; | 47 const bool OldValue; |
| 262 bool &Flag; | 48 bool &Flag; |
| 263 }; | 49 }; |
| 264 | 50 |
| 265 } // end of anonymous namespace | 51 template <class MachineTraits> class BoolFoldingEntry { |
| 52 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | |
| 266 | 53 |
| 267 BoolFoldingEntry::BoolFoldingEntry(Inst *I) | 54 public: |
| 268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {} | 55 BoolFoldingEntry() = default; |
| 56 explicit BoolFoldingEntry(Inst *I); | |
| 57 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | |
| 58 // Instr is the instruction producing the i1-type variable of interest. | |
| 59 Inst *Instr = nullptr; | |
| 60 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | |
| 61 bool IsComplex = false; | |
| 62 // IsLiveOut is initialized conservatively to true, and is set to false when | |
| 63 // we encounter an instruction that ends Var's live range. We disable the | |
| 64 // folding optimization when Var is live beyond this basic block. Note that | |
| 65 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | |
| 66 // always be true and the folding optimization will never be performed. | |
| 67 bool IsLiveOut = true; | |
| 68 // NumUses counts the number of times Var is used as a source operand in the | |
| 69 // basic block. If IsComplex is true and there is more than one use of Var, | |
| 70 // then the folding optimization is disabled for Var. | |
| 71 uint32_t NumUses = 0; | |
| 72 }; | |
| 269 | 73 |
| 270 BoolFolding::BoolFoldingProducerKind | 74 template <class MachineTraits> class BoolFolding { |
| 271 BoolFolding::getProducerKind(const Inst *Instr) { | 75 public: |
| 76 enum BoolFoldingProducerKind { | |
| 77 PK_None, | |
| 78 PK_Icmp32, | |
| 79 PK_Icmp64, | |
| 80 PK_Fcmp, | |
| 81 PK_Trunc | |
| 82 }; | |
| 83 | |
| 84 // Currently the actual enum values are not used (other than CK_None), but we | |
| 85 // go | |
| 86 // ahead and produce them anyway for symmetry with the | |
| 87 // BoolFoldingProducerKind. | |
| 88 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | |
| 89 | |
| 90 private: | |
| 91 BoolFolding(const BoolFolding &) = delete; | |
| 92 BoolFolding &operator=(const BoolFolding &) = delete; | |
| 93 | |
| 94 public: | |
| 95 BoolFolding() = default; | |
| 96 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); | |
| 97 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); | |
| 98 static bool hasComplexLowering(const Inst *Instr); | |
| 99 void init(CfgNode *Node); | |
| 100 const Inst *getProducerFor(const Operand *Opnd) const; | |
| 101 void dump(const Cfg *Func) const; | |
| 102 | |
| 103 private: | |
| 104 // Returns true if Producers contains a valid entry for the given VarNum. | |
| 105 bool containsValid(SizeT VarNum) const { | |
| 106 auto Element = Producers.find(VarNum); | |
| 107 return Element != Producers.end() && Element->second.Instr != nullptr; | |
| 108 } | |
| 109 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | |
| 110 // Producers maps Variable::Number to a BoolFoldingEntry. | |
| 111 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; | |
| 112 }; | |
| 113 | |
| 114 template <class MachineTraits> | |
| 115 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | |
| 116 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | |
| 117 | |
| 118 template <class MachineTraits> | |
| 119 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | |
| 120 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | |
| 272 if (llvm::isa<InstIcmp>(Instr)) { | 121 if (llvm::isa<InstIcmp>(Instr)) { |
| 273 if (Instr->getSrc(0)->getType() != IceType_i64) | 122 if (Instr->getSrc(0)->getType() != IceType_i64) |
| 274 return PK_Icmp32; | 123 return PK_Icmp32; |
| 275 return PK_None; // TODO(stichnot): actually PK_Icmp64; | 124 return PK_None; // TODO(stichnot): actually PK_Icmp64; |
| 276 } | 125 } |
| 277 return PK_None; // TODO(stichnot): remove this | 126 return PK_None; // TODO(stichnot): remove this |
| 278 | 127 |
| 279 if (llvm::isa<InstFcmp>(Instr)) | 128 if (llvm::isa<InstFcmp>(Instr)) |
| 280 return PK_Fcmp; | 129 return PK_Fcmp; |
| 281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 130 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 282 switch (Cast->getCastKind()) { | 131 switch (Cast->getCastKind()) { |
| 283 default: | 132 default: |
| 284 return PK_None; | 133 return PK_None; |
| 285 case InstCast::Trunc: | 134 case InstCast::Trunc: |
| 286 return PK_Trunc; | 135 return PK_Trunc; |
| 287 } | 136 } |
| 288 } | 137 } |
| 289 return PK_None; | 138 return PK_None; |
| 290 } | 139 } |
| 291 | 140 |
| 292 BoolFolding::BoolFoldingConsumerKind | 141 template <class MachineTraits> |
| 293 BoolFolding::getConsumerKind(const Inst *Instr) { | 142 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind |
| 143 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) { | |
| 294 if (llvm::isa<InstBr>(Instr)) | 144 if (llvm::isa<InstBr>(Instr)) |
| 295 return CK_Br; | 145 return CK_Br; |
| 296 if (llvm::isa<InstSelect>(Instr)) | 146 if (llvm::isa<InstSelect>(Instr)) |
| 297 return CK_Select; | 147 return CK_Select; |
| 298 return CK_None; // TODO(stichnot): remove this | 148 return CK_None; // TODO(stichnot): remove this |
| 299 | 149 |
| 300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 150 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 301 switch (Cast->getCastKind()) { | 151 switch (Cast->getCastKind()) { |
| 302 default: | 152 default: |
| 303 return CK_None; | 153 return CK_None; |
| 304 case InstCast::Sext: | 154 case InstCast::Sext: |
| 305 return CK_Sext; | 155 return CK_Sext; |
| 306 case InstCast::Zext: | 156 case InstCast::Zext: |
| 307 return CK_Zext; | 157 return CK_Zext; |
| 308 } | 158 } |
| 309 } | 159 } |
| 310 return CK_None; | 160 return CK_None; |
| 311 } | 161 } |
| 312 | 162 |
| 313 // Returns true if the producing instruction has a "complex" lowering | 163 // Returns true if the producing instruction has a "complex" lowering |
| 314 // sequence. This generally means that its lowering sequence requires | 164 // sequence. This generally means that its lowering sequence requires |
| 315 // more than one conditional branch, namely 64-bit integer compares | 165 // more than one conditional branch, namely 64-bit integer compares |
| 316 // and some floating-point compares. When this is true, and there is | 166 // and some floating-point compares. When this is true, and there is |
| 317 // more than one consumer, we prefer to disable the folding | 167 // more than one consumer, we prefer to disable the folding |
| 318 // optimization because it minimizes branches. | 168 // optimization because it minimizes branches. |
| 319 bool BoolFolding::hasComplexLowering(const Inst *Instr) { | 169 template <class MachineTraits> |
| 170 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | |
| 320 switch (getProducerKind(Instr)) { | 171 switch (getProducerKind(Instr)) { |
| 321 default: | 172 default: |
| 322 return false; | 173 return false; |
| 323 case PK_Icmp64: | 174 case PK_Icmp64: |
| 324 return true; | 175 return true; |
| 325 case PK_Fcmp: | 176 case PK_Fcmp: |
| 326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != | 177 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 327 CondX86::Br_None; | 178 .C2 != CondX86::Br_None; |
| 328 } | 179 } |
| 329 } | 180 } |
| 330 | 181 |
| 331 void BoolFolding::init(CfgNode *Node) { | 182 template <class MachineTraits> |
| 183 void BoolFolding<MachineTraits>::init(CfgNode *Node) { | |
| 332 Producers.clear(); | 184 Producers.clear(); |
| 333 for (Inst &Instr : Node->getInsts()) { | 185 for (Inst &Instr : Node->getInsts()) { |
| 334 // Check whether Instr is a valid producer. | 186 // Check whether Instr is a valid producer. |
| 335 Variable *Var = Instr.getDest(); | 187 Variable *Var = Instr.getDest(); |
| 336 if (!Instr.isDeleted() // only consider non-deleted instructions | 188 if (!Instr.isDeleted() // only consider non-deleted instructions |
| 337 && Var // only instructions with an actual dest var | 189 && Var // only instructions with an actual dest var |
| 338 && Var->getType() == IceType_i1 // only bool-type dest vars | 190 && Var->getType() == IceType_i1 // only bool-type dest vars |
| 339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions | 191 && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
| 340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr); | 192 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); |
| 341 } | 193 } |
| 342 // Check each src variable against the map. | 194 // Check each src variable against the map. |
| 343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { | 195 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { |
| 344 Operand *Src = Instr.getSrc(I); | 196 Operand *Src = Instr.getSrc(I); |
| 345 SizeT NumVars = Src->getNumVars(); | 197 SizeT NumVars = Src->getNumVars(); |
| 346 for (SizeT J = 0; J < NumVars; ++J) { | 198 for (SizeT J = 0; J < NumVars; ++J) { |
| 347 const Variable *Var = Src->getVar(J); | 199 const Variable *Var = Src->getVar(J); |
| 348 SizeT VarNum = Var->getIndex(); | 200 SizeT VarNum = Var->getIndex(); |
| 349 if (containsValid(VarNum)) { | 201 if (containsValid(VarNum)) { |
| 350 if (I != 0 // All valid consumers use Var as the first source operand | 202 if (I != 0 // All valid consumers use Var as the first source operand |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 372 continue; | 224 continue; |
| 373 } | 225 } |
| 374 // Mark as "dead" rather than outright deleting. This is so that | 226 // Mark as "dead" rather than outright deleting. This is so that |
| 375 // other peephole style optimizations during or before lowering | 227 // other peephole style optimizations during or before lowering |
| 376 // have access to this instruction in undeleted form. See for | 228 // have access to this instruction in undeleted form. See for |
| 377 // example tryOptimizedCmpxchgCmpBr(). | 229 // example tryOptimizedCmpxchgCmpBr(). |
| 378 I.second.Instr->setDead(); | 230 I.second.Instr->setDead(); |
| 379 } | 231 } |
| 380 } | 232 } |
| 381 | 233 |
| 382 const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const { | 234 template <class MachineTraits> |
| 235 const Inst * | |
| 236 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { | |
| 383 auto *Var = llvm::dyn_cast<const Variable>(Opnd); | 237 auto *Var = llvm::dyn_cast<const Variable>(Opnd); |
| 384 if (Var == nullptr) | 238 if (Var == nullptr) |
| 385 return nullptr; | 239 return nullptr; |
| 386 SizeT VarNum = Var->getIndex(); | 240 SizeT VarNum = Var->getIndex(); |
| 387 auto Element = Producers.find(VarNum); | 241 auto Element = Producers.find(VarNum); |
| 388 if (Element == Producers.end()) | 242 if (Element == Producers.end()) |
| 389 return nullptr; | 243 return nullptr; |
| 390 return Element->second.Instr; | 244 return Element->second.Instr; |
| 391 } | 245 } |
| 392 | 246 |
| 393 void BoolFolding::dump(const Cfg *Func) const { | 247 template <class MachineTraits> |
| 248 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const { | |
| 394 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) | 249 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) |
| 395 return; | 250 return; |
| 396 OstreamLocker L(Func->getContext()); | 251 OstreamLocker L(Func->getContext()); |
| 397 Ostream &Str = Func->getContext()->getStrDump(); | 252 Ostream &Str = Func->getContext()->getStrDump(); |
| 398 for (auto &I : Producers) { | 253 for (auto &I : Producers) { |
| 399 if (I.second.Instr == nullptr) | 254 if (I.second.Instr == nullptr) |
| 400 continue; | 255 continue; |
| 401 Str << "Found foldable producer:\n "; | 256 Str << "Found foldable producer:\n "; |
| 402 I.second.Instr->dump(Func); | 257 I.second.Instr->dump(Func); |
| 403 Str << "\n"; | 258 Str << "\n"; |
| 404 } | 259 } |
| 405 } | 260 } |
| 406 | 261 |
| 407 void TargetX8632::initNodeForLowering(CfgNode *Node) { | 262 template <class Machine> |
| 263 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) { | |
| 408 FoldingInfo.init(Node); | 264 FoldingInfo.init(Node); |
| 409 FoldingInfo.dump(Func); | 265 FoldingInfo.dump(Func); |
| 410 } | 266 } |
| 411 | 267 |
| 412 TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) { | 268 template <class Machine> |
| 413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == | 269 TargetX86Base<Machine>::TargetX86Base(Cfg *Func) |
| 414 (TargetInstructionSet::X86InstructionSet_End - | 270 : Machine(Func) { |
| 415 TargetInstructionSet::X86InstructionSet_Begin), | 271 static_assert( |
| 416 "X86InstructionSet range different from TargetInstructionSet"); | 272 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == |
| 273 (TargetInstructionSet::X86InstructionSet_End - | |
| 274 TargetInstructionSet::X86InstructionSet_Begin), | |
| 275 "Traits::InstructionSet range different from TargetInstructionSet"); | |
| 417 if (Func->getContext()->getFlags().getTargetInstructionSet() != | 276 if (Func->getContext()->getFlags().getTargetInstructionSet() != |
| 418 TargetInstructionSet::BaseInstructionSet) { | 277 TargetInstructionSet::BaseInstructionSet) { |
| 419 InstructionSet = static_cast<X86InstructionSet>( | 278 InstructionSet = static_cast<typename Traits::InstructionSet>( |
| 420 (Func->getContext()->getFlags().getTargetInstructionSet() - | 279 (Func->getContext()->getFlags().getTargetInstructionSet() - |
| 421 TargetInstructionSet::X86InstructionSet_Begin) + | 280 TargetInstructionSet::X86InstructionSet_Begin) + |
| 422 X86InstructionSet::Begin); | 281 Traits::InstructionSet::Begin); |
| 423 } | 282 } |
| 424 // TODO: Don't initialize IntegerRegisters and friends every time. | 283 // TODO: Don't initialize IntegerRegisters and friends every time. |
| 425 // Instead, initialize in some sort of static initializer for the | 284 // Instead, initialize in some sort of static initializer for the |
| 426 // class. | 285 // class. |
| 427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); | 286 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); |
| 428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); | 287 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); |
| 429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); | 288 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); |
| 430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); | 289 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); |
| 431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); | 290 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); |
| 432 ScratchRegs.resize(RegX8632::Reg_NUM); | 291 ScratchRegs.resize(RegX8632::Reg_NUM); |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 449 TypeToRegisterSet[IceType_f64] = FloatRegisters; | 308 TypeToRegisterSet[IceType_f64] = FloatRegisters; |
| 450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; | 309 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; |
| 451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; | 310 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; |
| 452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; | 311 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; |
| 453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; | 312 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; |
| 454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; | 313 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; |
| 455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; | 314 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; |
| 456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 315 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
| 457 } | 316 } |
| 458 | 317 |
| 459 void TargetX8632::translateO2() { | 318 template <class Machine> void TargetX86Base<Machine>::translateO2() { |
| 460 TimerMarker T(TimerStack::TT_O2, Func); | 319 TimerMarker T(TimerStack::TT_O2, Func); |
| 461 | 320 |
| 462 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 321 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
| 463 // Lower Phi instructions. | 322 // Lower Phi instructions. |
| 464 Func->placePhiLoads(); | 323 Func->placePhiLoads(); |
| 465 if (Func->hasError()) | 324 if (Func->hasError()) |
| 466 return; | 325 return; |
| 467 Func->placePhiStores(); | 326 Func->placePhiStores(); |
| 468 if (Func->hasError()) | 327 if (Func->hasError()) |
| 469 return; | 328 return; |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 561 // needed for searching for opportunities. | 420 // needed for searching for opportunities. |
| 562 Func->doBranchOpt(); | 421 Func->doBranchOpt(); |
| 563 Func->dump("After branch optimization"); | 422 Func->dump("After branch optimization"); |
| 564 | 423 |
| 565 // Nop insertion | 424 // Nop insertion |
| 566 if (Ctx->getFlags().shouldDoNopInsertion()) { | 425 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 567 Func->doNopInsertion(); | 426 Func->doNopInsertion(); |
| 568 } | 427 } |
| 569 } | 428 } |
| 570 | 429 |
| 571 void TargetX8632::translateOm1() { | 430 template <class Machine> void TargetX86Base<Machine>::translateOm1() { |
| 572 TimerMarker T(TimerStack::TT_Om1, Func); | 431 TimerMarker T(TimerStack::TT_Om1, Func); |
| 573 | 432 |
| 574 Func->placePhiLoads(); | 433 Func->placePhiLoads(); |
| 575 if (Func->hasError()) | 434 if (Func->hasError()) |
| 576 return; | 435 return; |
| 577 Func->placePhiStores(); | 436 Func->placePhiStores(); |
| 578 if (Func->hasError()) | 437 if (Func->hasError()) |
| 579 return; | 438 return; |
| 580 Func->deletePhis(); | 439 Func->deletePhis(); |
| 581 if (Func->hasError()) | 440 if (Func->hasError()) |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 598 if (Func->hasError()) | 457 if (Func->hasError()) |
| 599 return; | 458 return; |
| 600 Func->dump("After stack frame mapping"); | 459 Func->dump("After stack frame mapping"); |
| 601 | 460 |
| 602 // Nop insertion | 461 // Nop insertion |
| 603 if (Ctx->getFlags().shouldDoNopInsertion()) { | 462 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 604 Func->doNopInsertion(); | 463 Func->doNopInsertion(); |
| 605 } | 464 } |
| 606 } | 465 } |
| 607 | 466 |
| 608 namespace { | |
| 609 | |
| 610 bool canRMW(const InstArithmetic *Arith) { | 467 bool canRMW(const InstArithmetic *Arith) { |
| 611 Type Ty = Arith->getDest()->getType(); | 468 Type Ty = Arith->getDest()->getType(); |
| 612 // X86 vector instructions write to a register and have no RMW | 469 // X86 vector instructions write to a register and have no RMW |
| 613 // option. | 470 // option. |
| 614 if (isVectorType(Ty)) | 471 if (isVectorType(Ty)) |
| 615 return false; | 472 return false; |
| 616 bool isI64 = Ty == IceType_i64; | 473 bool isI64 = Ty == IceType_i64; |
| 617 | 474 |
| 618 switch (Arith->getOp()) { | 475 switch (Arith->getOp()) { |
| 619 // Not handled for lack of simple lowering: | 476 // Not handled for lack of simple lowering: |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 645 return MemA->getBase() == MemB->getBase() && | 502 return MemA->getBase() == MemB->getBase() && |
| 646 MemA->getOffset() == MemB->getOffset() && | 503 MemA->getOffset() == MemB->getOffset() && |
| 647 MemA->getIndex() == MemB->getIndex() && | 504 MemA->getIndex() == MemB->getIndex() && |
| 648 MemA->getShift() == MemB->getShift() && | 505 MemA->getShift() == MemB->getShift() && |
| 649 MemA->getSegmentRegister() == MemB->getSegmentRegister(); | 506 MemA->getSegmentRegister() == MemB->getSegmentRegister(); |
| 650 } | 507 } |
| 651 } | 508 } |
| 652 return false; | 509 return false; |
| 653 } | 510 } |
| 654 | 511 |
| 655 } // end of anonymous namespace | 512 template <class Machine> void TargetX86Base<Machine>::findRMW() { |
| 656 | |
| 657 void TargetX8632::findRMW() { | |
| 658 Func->dump("Before RMW"); | 513 Func->dump("Before RMW"); |
| 659 OstreamLocker L(Func->getContext()); | 514 OstreamLocker L(Func->getContext()); |
| 660 Ostream &Str = Func->getContext()->getStrDump(); | 515 Ostream &Str = Func->getContext()->getStrDump(); |
| 661 for (CfgNode *Node : Func->getNodes()) { | 516 for (CfgNode *Node : Func->getNodes()) { |
| 662 // Walk through the instructions, considering each sequence of 3 | 517 // Walk through the instructions, considering each sequence of 3 |
| 663 // instructions, and look for the particular RMW pattern. Note that this | 518 // instructions, and look for the particular RMW pattern. Note that this |
| 664 // search can be "broken" (false negatives) if there are intervening deleted | 519 // search can be "broken" (false negatives) if there are intervening deleted |
| 665 // instructions, or intervening instructions that could be safely moved out | 520 // instructions, or intervening instructions that could be safely moved out |
| 666 // of the way to reveal an RMW pattern. | 521 // of the way to reveal an RMW pattern. |
| 667 auto E = Node->getInsts().end(); | 522 auto E = Node->getInsts().end(); |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( | 594 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( |
| 740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); | 595 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); |
| 741 Node->getInsts().insert(I3, RMW); | 596 Node->getInsts().insert(I3, RMW); |
| 742 } | 597 } |
| 743 } | 598 } |
| 744 } | 599 } |
| 745 } | 600 } |
| 746 } | 601 } |
| 747 } | 602 } |
| 748 | 603 |
| 749 namespace { | |
| 750 | |
| 751 // Converts a ConstantInteger32 operand into its constant value, or | 604 // Converts a ConstantInteger32 operand into its constant value, or |
| 752 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 605 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 753 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 606 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 607 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 755 return Integer->getValue(); | 608 return Integer->getValue(); |
| 756 return Intrinsics::MemoryOrderInvalid; | 609 return Intrinsics::MemoryOrderInvalid; |
| 757 } | 610 } |
| 758 | 611 |
| 759 // Determines whether the dest of a Load instruction can be folded | 612 // Determines whether the dest of a Load instruction can be folded |
| 760 // into one of the src operands of a 2-operand instruction. This is | 613 // into one of the src operands of a 2-operand instruction. This is |
| 761 // true as long as the load dest matches exactly one of the binary | 614 // true as long as the load dest matches exactly one of the binary |
| 762 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if | 615 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
| 763 // the answer is true. | 616 // the answer is true. |
| 764 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 617 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
| 765 Operand *&Src0, Operand *&Src1) { | 618 Operand *&Src0, Operand *&Src1) { |
| 766 if (Src0 == LoadDest && Src1 != LoadDest) { | 619 if (Src0 == LoadDest && Src1 != LoadDest) { |
| 767 Src0 = LoadSrc; | 620 Src0 = LoadSrc; |
| 768 return true; | 621 return true; |
| 769 } | 622 } |
| 770 if (Src0 != LoadDest && Src1 == LoadDest) { | 623 if (Src0 != LoadDest && Src1 == LoadDest) { |
| 771 Src1 = LoadSrc; | 624 Src1 = LoadSrc; |
| 772 return true; | 625 return true; |
| 773 } | 626 } |
| 774 return false; | 627 return false; |
| 775 } | 628 } |
| 776 | 629 |
| 777 } // end of anonymous namespace | 630 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { |
| 778 | |
| 779 void TargetX8632::doLoadOpt() { | |
| 780 for (CfgNode *Node : Func->getNodes()) { | 631 for (CfgNode *Node : Func->getNodes()) { |
| 781 Context.init(Node); | 632 Context.init(Node); |
| 782 while (!Context.atEnd()) { | 633 while (!Context.atEnd()) { |
| 783 Variable *LoadDest = nullptr; | 634 Variable *LoadDest = nullptr; |
| 784 Operand *LoadSrc = nullptr; | 635 Operand *LoadSrc = nullptr; |
| 785 Inst *CurInst = Context.getCur(); | 636 Inst *CurInst = Context.getCur(); |
| 786 Inst *Next = Context.getNextInst(); | 637 Inst *Next = Context.getNextInst(); |
| 787 // Determine whether the current instruction is a Load | 638 // Determine whether the current instruction is a Load |
| 788 // instruction or equivalent. | 639 // instruction or equivalent. |
| 789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 640 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 859 NewInst->spliceLivenessInfo(Next, CurInst); | 710 NewInst->spliceLivenessInfo(Next, CurInst); |
| 860 } | 711 } |
| 861 } | 712 } |
| 862 Context.advanceCur(); | 713 Context.advanceCur(); |
| 863 Context.advanceNext(); | 714 Context.advanceNext(); |
| 864 } | 715 } |
| 865 } | 716 } |
| 866 Func->dump("After load optimization"); | 717 Func->dump("After load optimization"); |
| 867 } | 718 } |
| 868 | 719 |
| 869 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 720 template <class Machine> |
| 721 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | |
| 870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 722 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
| 871 return Br->optimizeBranch(NextNode); | 723 return Br->optimizeBranch(NextNode); |
| 872 } | 724 } |
| 873 return false; | 725 return false; |
| 874 } | 726 } |
| 875 | 727 |
| 876 IceString TargetX8632::RegNames[] = { | 728 template <class Machine> |
| 729 IceString TargetX86Base<Machine>::RegNames[] = { | |
| 877 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 730 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 878 frameptr, isI8, isInt, isFP) \ | 731 frameptr, isI8, isInt, isFP) \ |
| 879 name, | 732 name, |
| 880 REGX8632_TABLE | 733 REGX8632_TABLE |
| 881 #undef X | 734 #undef X |
| 882 }; | 735 }; |
| 883 | 736 |
| 884 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) { | 737 template <class Machine> |
| 738 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | |
| 885 if (Ty == IceType_void) | 739 if (Ty == IceType_void) |
| 886 Ty = IceType_i32; | 740 Ty = IceType_i32; |
| 887 if (PhysicalRegisters[Ty].empty()) | 741 if (PhysicalRegisters[Ty].empty()) |
| 888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); | 742 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); |
| 889 assert(RegNum < PhysicalRegisters[Ty].size()); | 743 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 890 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 744 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 891 if (Reg == nullptr) { | 745 if (Reg == nullptr) { |
| 892 Reg = Func->makeVariable(Ty); | 746 Reg = Func->makeVariable(Ty); |
| 893 Reg->setRegNum(RegNum); | 747 Reg->setRegNum(RegNum); |
| 894 PhysicalRegisters[Ty][RegNum] = Reg; | 748 PhysicalRegisters[Ty][RegNum] = Reg; |
| 895 // Specially mark esp as an "argument" so that it is considered | 749 // Specially mark esp as an "argument" so that it is considered |
| 896 // live upon function entry. | 750 // live upon function entry. |
| 897 if (RegNum == RegX8632::Reg_esp) { | 751 if (RegNum == RegX8632::Reg_esp) { |
| 898 Func->addImplicitArg(Reg); | 752 Func->addImplicitArg(Reg); |
| 899 Reg->setIgnoreLiveness(); | 753 Reg->setIgnoreLiveness(); |
| 900 } | 754 } |
| 901 } | 755 } |
| 902 return Reg; | 756 return Reg; |
| 903 } | 757 } |
| 904 | 758 |
| 905 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { | 759 template <class Machine> |
| 760 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | |
| 906 assert(RegNum < RegX8632::Reg_NUM); | 761 assert(RegNum < RegX8632::Reg_NUM); |
| 907 static IceString RegNames8[] = { | 762 static IceString RegNames8[] = { |
| 908 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 763 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 909 frameptr, isI8, isInt, isFP) \ | 764 frameptr, isI8, isInt, isFP) \ |
| 910 name8, | 765 name8, |
| 911 REGX8632_TABLE | 766 REGX8632_TABLE |
| 912 #undef X | 767 #undef X |
| 913 }; | 768 }; |
| 914 static IceString RegNames16[] = { | 769 static IceString RegNames16[] = { |
| 915 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 770 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 916 frameptr, isI8, isInt, isFP) \ | 771 frameptr, isI8, isInt, isFP) \ |
| 917 name16, | 772 name16, |
| 918 REGX8632_TABLE | 773 REGX8632_TABLE |
| 919 #undef X | 774 #undef X |
| 920 }; | 775 }; |
| 921 switch (Ty) { | 776 switch (Ty) { |
| 922 case IceType_i1: | 777 case IceType_i1: |
| 923 case IceType_i8: | 778 case IceType_i8: |
| 924 return RegNames8[RegNum]; | 779 return RegNames8[RegNum]; |
| 925 case IceType_i16: | 780 case IceType_i16: |
| 926 return RegNames16[RegNum]; | 781 return RegNames16[RegNum]; |
| 927 default: | 782 default: |
| 928 return RegNames[RegNum]; | 783 return RegNames[RegNum]; |
| 929 } | 784 } |
| 930 } | 785 } |
| 931 | 786 |
| 932 void TargetX8632::emitVariable(const Variable *Var) const { | 787 template <class Machine> |
| 788 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | |
| 933 Ostream &Str = Ctx->getStrEmit(); | 789 Ostream &Str = Ctx->getStrEmit(); |
| 934 if (Var->hasReg()) { | 790 if (Var->hasReg()) { |
| 935 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 791 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
| 936 return; | 792 return; |
| 937 } | 793 } |
| 938 if (Var->getWeight().isInf()) { | 794 if (Var->getWeight().isInf()) { |
| 939 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 795 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 940 } | 796 } |
| 941 int32_t Offset = Var->getStackOffset(); | 797 int32_t Offset = Var->getStackOffset(); |
| 942 if (!hasFramePointer()) | 798 if (!hasFramePointer()) |
| 943 Offset += getStackAdjustment(); | 799 Offset += getStackAdjustment(); |
| 944 if (Offset) | 800 if (Offset) |
| 945 Str << Offset; | 801 Str << Offset; |
| 946 const Type FrameSPTy = IceType_i32; | 802 const Type FrameSPTy = IceType_i32; |
| 947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; | 803 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; |
| 948 } | 804 } |
| 949 | 805 |
| 950 X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const { | 806 template <class Machine> |
| 807 X8632::Address | |
| 808 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | |
| 951 if (Var->hasReg()) | 809 if (Var->hasReg()) |
| 952 llvm_unreachable("Stack Variable has a register assigned"); | 810 llvm_unreachable("Stack Variable has a register assigned"); |
| 953 if (Var->getWeight().isInf()) { | 811 if (Var->getWeight().isInf()) { |
| 954 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 812 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 955 } | 813 } |
| 956 int32_t Offset = Var->getStackOffset(); | 814 int32_t Offset = Var->getStackOffset(); |
| 957 if (!hasFramePointer()) | 815 if (!hasFramePointer()) |
| 958 Offset += getStackAdjustment(); | 816 Offset += getStackAdjustment(); |
| 959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); | 817 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); |
| 960 } | 818 } |
| 961 | 819 |
| 962 void TargetX8632::lowerArguments() { | 820 template <class Machine> void TargetX86Base<Machine>::lowerArguments() { |
| 963 VarList &Args = Func->getArgs(); | 821 VarList &Args = Func->getArgs(); |
| 964 // The first four arguments of vector type, regardless of their | 822 // The first four arguments of vector type, regardless of their |
| 965 // position relative to the other arguments in the argument list, are | 823 // position relative to the other arguments in the argument list, are |
| 966 // passed in registers xmm0 - xmm3. | 824 // passed in registers xmm0 - xmm3. |
| 967 unsigned NumXmmArgs = 0; | 825 unsigned NumXmmArgs = 0; |
| 968 | 826 |
| 969 Context.init(Func->getEntryNode()); | 827 Context.init(Func->getEntryNode()); |
| 970 Context.setInsertPoint(Context.getCur()); | 828 Context.setInsertPoint(Context.getCur()); |
| 971 | 829 |
| 972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; | 830 for (SizeT I = 0, E = Args.size(); |
| 973 ++I) { | 831 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { |
| 974 Variable *Arg = Args[I]; | 832 Variable *Arg = Args[I]; |
| 975 Type Ty = Arg->getType(); | 833 Type Ty = Arg->getType(); |
| 976 if (!isVectorType(Ty)) | 834 if (!isVectorType(Ty)) |
| 977 continue; | 835 continue; |
| 978 // Replace Arg in the argument list with the home register. Then | 836 // Replace Arg in the argument list with the home register. Then |
| 979 // generate an instruction in the prolog to copy the home register | 837 // generate an instruction in the prolog to copy the home register |
| 980 // to the assigned location of Arg. | 838 // to the assigned location of Arg. |
| 981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; | 839 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; |
| 982 ++NumXmmArgs; | 840 ++NumXmmArgs; |
| 983 Variable *RegisterArg = Func->makeVariable(Ty); | 841 Variable *RegisterArg = Func->makeVariable(Ty); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 994 | 852 |
| 995 // Helper function for addProlog(). | 853 // Helper function for addProlog(). |
| 996 // | 854 // |
| 997 // This assumes Arg is an argument passed on the stack. This sets the | 855 // This assumes Arg is an argument passed on the stack. This sets the |
| 998 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 856 // frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 999 // width. For an I64 arg that has been split into Lo and Hi components, | 857 // width. For an I64 arg that has been split into Lo and Hi components, |
| 1000 // it calls itself recursively on the components, taking care to handle | 858 // it calls itself recursively on the components, taking care to handle |
| 1001 // Lo first because of the little-endian architecture. Lastly, this | 859 // Lo first because of the little-endian architecture. Lastly, this |
| 1002 // function generates an instruction to copy Arg into its assigned | 860 // function generates an instruction to copy Arg into its assigned |
| 1003 // register if applicable. | 861 // register if applicable. |
| 1004 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 862 template <class Machine> |
| 1005 size_t BasicFrameOffset, | 863 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 1006 size_t &InArgsSizeBytes) { | 864 Variable *FramePtr, |
| 865 size_t BasicFrameOffset, | |
| 866 size_t &InArgsSizeBytes) { | |
| 1007 Variable *Lo = Arg->getLo(); | 867 Variable *Lo = Arg->getLo(); |
| 1008 Variable *Hi = Arg->getHi(); | 868 Variable *Hi = Arg->getHi(); |
| 1009 Type Ty = Arg->getType(); | 869 Type Ty = Arg->getType(); |
| 1010 if (Lo && Hi && Ty == IceType_i64) { | 870 if (Lo && Hi && Ty == IceType_i64) { |
| 1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 871 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 872 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 1013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 873 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 874 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 1015 return; | 875 return; |
| 1016 } | 876 } |
| 1017 if (isVectorType(Ty)) { | 877 if (isVectorType(Ty)) { |
| 1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); | 878 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 1019 } | 879 } |
| 1020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 880 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 1021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 881 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 1022 if (Arg->hasReg()) { | 882 if (Arg->hasReg()) { |
| 1023 assert(Ty != IceType_i64); | 883 assert(Ty != IceType_i64); |
| 1024 OperandX8632Mem *Mem = OperandX8632Mem::create( | 884 OperandX8632Mem *Mem = OperandX8632Mem::create( |
| 1025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 885 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
| 1026 if (isVectorType(Arg->getType())) { | 886 if (isVectorType(Arg->getType())) { |
| 1027 _movp(Arg, Mem); | 887 _movp(Arg, Mem); |
| 1028 } else { | 888 } else { |
| 1029 _mov(Arg, Mem); | 889 _mov(Arg, Mem); |
| 1030 } | 890 } |
| 1031 // This argument-copying instruction uses an explicit | 891 // This argument-copying instruction uses an explicit |
| 1032 // OperandX8632Mem operand instead of a Variable, so its | 892 // OperandX8632Mem operand instead of a Variable, so its |
| 1033 // fill-from-stack operation has to be tracked separately for | 893 // fill-from-stack operation has to be tracked separately for |
| 1034 // statistics. | 894 // statistics. |
| 1035 Ctx->statsUpdateFills(); | 895 Ctx->statsUpdateFills(); |
| 1036 } | 896 } |
| 1037 } | 897 } |
| 1038 | 898 |
| 1039 Type TargetX8632::stackSlotType() { return IceType_i32; } | 899 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 900 return IceType_i32; | |
| 901 } | |
| 1040 | 902 |
| 1041 void TargetX8632::addProlog(CfgNode *Node) { | 903 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { |
| 1042 // Stack frame layout: | 904 // Stack frame layout: |
| 1043 // | 905 // |
| 1044 // +------------------------+ | 906 // +------------------------+ |
| 1045 // | 1. return address | | 907 // | 1. return address | |
| 1046 // +------------------------+ | 908 // +------------------------+ |
| 1047 // | 2. preserved registers | | 909 // | 2. preserved registers | |
| 1048 // +------------------------+ | 910 // +------------------------+ |
| 1049 // | 3. padding | | 911 // | 3. padding | |
| 1050 // +------------------------+ | 912 // +------------------------+ |
| 1051 // | 4. global spill area | | 913 // | 4. global spill area | |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1140 _mov(ebp, esp); | 1002 _mov(ebp, esp); |
| 1141 // Keep ebp live for late-stage liveness analysis | 1003 // Keep ebp live for late-stage liveness analysis |
| 1142 // (e.g. asm-verbose mode). | 1004 // (e.g. asm-verbose mode). |
| 1143 Context.insert(InstFakeUse::create(Func, ebp)); | 1005 Context.insert(InstFakeUse::create(Func, ebp)); |
| 1144 } | 1006 } |
| 1145 | 1007 |
| 1146 // Align the variables area. SpillAreaPaddingBytes is the size of | 1008 // Align the variables area. SpillAreaPaddingBytes is the size of |
| 1147 // the region after the preserved registers and before the spill areas. | 1009 // the region after the preserved registers and before the spill areas. |
| 1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals | 1010 // LocalsSlotsPaddingBytes is the amount of padding between the globals |
| 1149 // and locals area if they are separate. | 1011 // and locals area if they are separate. |
| 1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); | 1012 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); |
| 1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 1013 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| 1152 uint32_t SpillAreaPaddingBytes = 0; | 1014 uint32_t SpillAreaPaddingBytes = 0; |
| 1153 uint32_t LocalsSlotsPaddingBytes = 0; | 1015 uint32_t LocalsSlotsPaddingBytes = 0; |
| 1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, | 1016 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, |
| 1155 SpillAreaAlignmentBytes, GlobalsSize, | 1017 SpillAreaAlignmentBytes, GlobalsSize, |
| 1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, | 1018 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, |
| 1157 &LocalsSlotsPaddingBytes); | 1019 &LocalsSlotsPaddingBytes); |
| 1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 1020 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| 1159 uint32_t GlobalsAndSubsequentPaddingSize = | 1021 uint32_t GlobalsAndSubsequentPaddingSize = |
| 1160 GlobalsSize + LocalsSlotsPaddingBytes; | 1022 GlobalsSize + LocalsSlotsPaddingBytes; |
| 1161 | 1023 |
| 1162 // Align esp if necessary. | 1024 // Align esp if necessary. |
| 1163 if (NeedsStackAlignment) { | 1025 if (NeedsStackAlignment) { |
| 1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 1026 uint32_t StackOffset = |
| 1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 1027 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 1028 uint32_t StackSize = | |
| 1029 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | |
| 1166 SpillAreaSizeBytes = StackSize - StackOffset; | 1030 SpillAreaSizeBytes = StackSize - StackOffset; |
| 1167 } | 1031 } |
| 1168 | 1032 |
| 1169 // Generate "sub esp, SpillAreaSizeBytes" | 1033 // Generate "sub esp, SpillAreaSizeBytes" |
| 1170 if (SpillAreaSizeBytes) | 1034 if (SpillAreaSizeBytes) |
| 1171 _sub(getPhysicalRegister(RegX8632::Reg_esp), | 1035 _sub(getPhysicalRegister(RegX8632::Reg_esp), |
| 1172 Ctx->getConstantInt32(SpillAreaSizeBytes)); | 1036 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 1173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1037 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 1174 | 1038 |
| 1175 resetStackAdjustment(); | 1039 resetStackAdjustment(); |
| 1176 | 1040 |
| 1177 // Fill in stack offsets for stack args, and copy args into registers | 1041 // Fill in stack offsets for stack args, and copy args into registers |
| 1178 // for those that were register-allocated. Args are pushed right to | 1042 // for those that were register-allocated. Args are pushed right to |
| 1179 // left, so Arg[0] is closest to the stack/frame pointer. | 1043 // left, so Arg[0] is closest to the stack/frame pointer. |
| 1180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1044 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 1181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 1045 size_t BasicFrameOffset = |
| 1046 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | |
| 1182 if (!IsEbpBasedFrame) | 1047 if (!IsEbpBasedFrame) |
| 1183 BasicFrameOffset += SpillAreaSizeBytes; | 1048 BasicFrameOffset += SpillAreaSizeBytes; |
| 1184 | 1049 |
| 1185 const VarList &Args = Func->getArgs(); | 1050 const VarList &Args = Func->getArgs(); |
| 1186 size_t InArgsSizeBytes = 0; | 1051 size_t InArgsSizeBytes = 0; |
| 1187 unsigned NumXmmArgs = 0; | 1052 unsigned NumXmmArgs = 0; |
| 1188 for (Variable *Arg : Args) { | 1053 for (Variable *Arg : Args) { |
| 1189 // Skip arguments passed in registers. | 1054 // Skip arguments passed in registers. |
| 1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 1055 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { |
| 1191 ++NumXmmArgs; | 1056 ++NumXmmArgs; |
| 1192 continue; | 1057 continue; |
| 1193 } | 1058 } |
| 1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 1059 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 1195 } | 1060 } |
| 1196 | 1061 |
| 1197 // Fill in stack offsets for locals. | 1062 // Fill in stack offsets for locals. |
| 1198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, | 1063 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| 1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, | 1064 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| 1200 IsEbpBasedFrame); | 1065 IsEbpBasedFrame); |
| 1201 // Assign stack offsets to variables that have been linked to spilled | 1066 // Assign stack offsets to variables that have been linked to spilled |
| 1202 // variables. | 1067 // variables. |
| 1203 for (Variable *Var : VariablesLinkedToSpillSlots) { | 1068 for (Variable *Var : VariablesLinkedToSpillSlots) { |
| 1204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); | 1069 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); |
| 1205 Var->setStackOffset(Linked->getStackOffset()); | 1070 Var->setStackOffset(Linked->getStackOffset()); |
| 1206 } | 1071 } |
| 1207 this->HasComputedFrame = true; | 1072 this->HasComputedFrame = true; |
| 1208 | 1073 |
| 1209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { | 1074 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { |
| 1210 OstreamLocker L(Func->getContext()); | 1075 OstreamLocker L(Func->getContext()); |
| 1211 Ostream &Str = Func->getContext()->getStrDump(); | 1076 Ostream &Str = Func->getContext()->getStrDump(); |
| 1212 | 1077 |
| 1213 Str << "Stack layout:\n"; | 1078 Str << "Stack layout:\n"; |
| 1214 uint32_t EspAdjustmentPaddingSize = | 1079 uint32_t EspAdjustmentPaddingSize = |
| 1215 SpillAreaSizeBytes - LocalsSpillAreaSize - | 1080 SpillAreaSizeBytes - LocalsSpillAreaSize - |
| 1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | 1081 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; |
| 1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 1082 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| 1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n" | 1083 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| 1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | 1084 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| 1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | 1085 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| 1221 << " globals spill area = " << GlobalsSize << " bytes\n" | 1086 << " globals spill area = " << GlobalsSize << " bytes\n" |
| 1222 << " globals-locals spill areas intermediate padding = " | 1087 << " globals-locals spill areas intermediate padding = " |
| 1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | 1088 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| 1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | 1089 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| 1225 << " esp alignment padding = " << EspAdjustmentPaddingSize | 1090 << " esp alignment padding = " << EspAdjustmentPaddingSize |
| 1226 << " bytes\n"; | 1091 << " bytes\n"; |
| 1227 | 1092 |
| 1228 Str << "Stack details:\n" | 1093 Str << "Stack details:\n" |
| 1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | 1094 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| 1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | 1095 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| 1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | 1096 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| 1232 << " bytes\n" | 1097 << " bytes\n" |
| 1233 << " is ebp based = " << IsEbpBasedFrame << "\n"; | 1098 << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| 1234 } | 1099 } |
| 1235 } | 1100 } |
| 1236 | 1101 |
| 1237 void TargetX8632::addEpilog(CfgNode *Node) { | 1102 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) { |
| 1238 InstList &Insts = Node->getInsts(); | 1103 InstList &Insts = Node->getInsts(); |
| 1239 InstList::reverse_iterator RI, E; | 1104 InstList::reverse_iterator RI, E; |
| 1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 1105 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| 1241 if (llvm::isa<InstX8632Ret>(*RI)) | 1106 if (llvm::isa<InstX8632Ret>(*RI)) |
| 1242 break; | 1107 break; |
| 1243 } | 1108 } |
| 1244 if (RI == E) | 1109 if (RI == E) |
| 1245 return; | 1110 return; |
| 1246 | 1111 |
| 1247 // Convert the reverse_iterator position into its corresponding | 1112 // Convert the reverse_iterator position into its corresponding |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1280 | 1145 |
| 1281 if (!Ctx->getFlags().getUseSandboxing()) | 1146 if (!Ctx->getFlags().getUseSandboxing()) |
| 1282 return; | 1147 return; |
| 1283 // Change the original ret instruction into a sandboxed return sequence. | 1148 // Change the original ret instruction into a sandboxed return sequence. |
| 1284 // t:ecx = pop | 1149 // t:ecx = pop |
| 1285 // bundle_lock | 1150 // bundle_lock |
| 1286 // and t, ~31 | 1151 // and t, ~31 |
| 1287 // jmp *t | 1152 // jmp *t |
| 1288 // bundle_unlock | 1153 // bundle_unlock |
| 1289 // FakeUse <original_ret_operand> | 1154 // FakeUse <original_ret_operand> |
| 1290 const SizeT BundleSize = 1 | 1155 const SizeT BundleSize = |
| 1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 1156 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 1157 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
| 1293 _pop(T_ecx); | 1158 _pop(T_ecx); |
| 1294 _bundle_lock(); | 1159 _bundle_lock(); |
| 1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | 1160 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 1296 _jmp(T_ecx); | 1161 _jmp(T_ecx); |
| 1297 _bundle_unlock(); | 1162 _bundle_unlock(); |
| 1298 if (RI->getSrcSize()) { | 1163 if (RI->getSrcSize()) { |
| 1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1164 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1300 Context.insert(InstFakeUse::create(Func, RetValue)); | 1165 Context.insert(InstFakeUse::create(Func, RetValue)); |
| 1301 } | 1166 } |
| 1302 RI->setDeleted(); | 1167 RI->setDeleted(); |
| 1303 } | 1168 } |
| 1304 | 1169 |
| 1305 void TargetX8632::split64(Variable *Var) { | 1170 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { |
| 1306 switch (Var->getType()) { | 1171 switch (Var->getType()) { |
| 1307 default: | 1172 default: |
| 1308 return; | 1173 return; |
| 1309 case IceType_i64: | 1174 case IceType_i64: |
| 1310 // TODO: Only consider F64 if we need to push each half when | 1175 // TODO: Only consider F64 if we need to push each half when |
| 1311 // passing as an argument to a function call. Note that each half | 1176 // passing as an argument to a function call. Note that each half |
| 1312 // is still typed as I32. | 1177 // is still typed as I32. |
| 1313 case IceType_f64: | 1178 case IceType_f64: |
| 1314 break; | 1179 break; |
| 1315 } | 1180 } |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 1326 Lo->setName(Func, Var->getName(Func) + "__lo"); | 1191 Lo->setName(Func, Var->getName(Func) + "__lo"); |
| 1327 Hi->setName(Func, Var->getName(Func) + "__hi"); | 1192 Hi->setName(Func, Var->getName(Func) + "__hi"); |
| 1328 } | 1193 } |
| 1329 Var->setLoHi(Lo, Hi); | 1194 Var->setLoHi(Lo, Hi); |
| 1330 if (Var->getIsArg()) { | 1195 if (Var->getIsArg()) { |
| 1331 Lo->setIsArg(); | 1196 Lo->setIsArg(); |
| 1332 Hi->setIsArg(); | 1197 Hi->setIsArg(); |
| 1333 } | 1198 } |
| 1334 } | 1199 } |
| 1335 | 1200 |
| 1336 Operand *TargetX8632::loOperand(Operand *Operand) { | 1201 template <class Machine> |
| 1202 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { | |
| 1337 assert(Operand->getType() == IceType_i64 || | 1203 assert(Operand->getType() == IceType_i64 || |
| 1338 Operand->getType() == IceType_f64); | 1204 Operand->getType() == IceType_f64); |
| 1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 1205 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 1340 return Operand; | 1206 return Operand; |
| 1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | 1207 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 1342 split64(Var); | 1208 split64(Var); |
| 1343 return Var->getLo(); | 1209 return Var->getLo(); |
| 1344 } | 1210 } |
| 1345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1211 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 1346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 1212 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 1213 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
| 1348 return legalize(ConstInt); | 1214 return legalize(ConstInt); |
| 1349 } | 1215 } |
| 1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | 1216 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { |
| 1351 OperandX8632Mem *MemOperand = OperandX8632Mem::create( | 1217 OperandX8632Mem *MemOperand = OperandX8632Mem::create( |
| 1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 1218 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
| 1353 Mem->getShift(), Mem->getSegmentRegister()); | 1219 Mem->getShift(), Mem->getSegmentRegister()); |
| 1354 // Test if we should randomize or pool the offset, if so randomize it or | 1220 // Test if we should randomize or pool the offset, if so randomize it or |
| 1355 // pool it then create mem operand with the blinded/pooled constant. | 1221 // pool it then create mem operand with the blinded/pooled constant. |
| 1356 // Otherwise, return the mem operand as ordinary mem operand. | 1222 // Otherwise, return the mem operand as ordinary mem operand. |
| 1357 return legalize(MemOperand); | 1223 return legalize(MemOperand); |
| 1358 } | 1224 } |
| 1359 llvm_unreachable("Unsupported operand type"); | 1225 llvm_unreachable("Unsupported operand type"); |
| 1360 return nullptr; | 1226 return nullptr; |
| 1361 } | 1227 } |
| 1362 | 1228 |
| 1363 Operand *TargetX8632::hiOperand(Operand *Operand) { | 1229 template <class Machine> |
| 1230 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { | |
| 1364 assert(Operand->getType() == IceType_i64 || | 1231 assert(Operand->getType() == IceType_i64 || |
| 1365 Operand->getType() == IceType_f64); | 1232 Operand->getType() == IceType_f64); |
| 1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 1233 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 1367 return Operand; | 1234 return Operand; |
| 1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | 1235 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 1369 split64(Var); | 1236 split64(Var); |
| 1370 return Var->getHi(); | 1237 return Var->getHi(); |
| 1371 } | 1238 } |
| 1372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1239 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 1373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 1240 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 1394 Mem->getShift(), Mem->getSegmentRegister()); | 1261 Mem->getShift(), Mem->getSegmentRegister()); |
| 1395 // Test if the Offset is an eligible i32 constants for randomization and | 1262 // Test if the Offset is an eligible i32 constants for randomization and |
| 1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem | 1263 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem |
| 1397 // operand. | 1264 // operand. |
| 1398 return legalize(MemOperand); | 1265 return legalize(MemOperand); |
| 1399 } | 1266 } |
| 1400 llvm_unreachable("Unsupported operand type"); | 1267 llvm_unreachable("Unsupported operand type"); |
| 1401 return nullptr; | 1268 return nullptr; |
| 1402 } | 1269 } |
| 1403 | 1270 |
| 1404 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, | 1271 template <class Machine> |
| 1405 RegSetMask Exclude) const { | 1272 llvm::SmallBitVector |
| 1273 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | |
| 1274 RegSetMask Exclude) const { | |
| 1406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); | 1275 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); |
| 1407 | 1276 |
| 1408 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 1277 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 1409 frameptr, isI8, isInt, isFP) \ | 1278 frameptr, isI8, isInt, isFP) \ |
| 1410 if (scratch && (Include & RegSet_CallerSave)) \ | 1279 if (scratch && (Include & RegSet_CallerSave)) \ |
| 1411 Registers[RegX8632::val] = true; \ | 1280 Registers[RegX8632::val] = true; \ |
| 1412 if (preserved && (Include & RegSet_CalleeSave)) \ | 1281 if (preserved && (Include & RegSet_CalleeSave)) \ |
| 1413 Registers[RegX8632::val] = true; \ | 1282 Registers[RegX8632::val] = true; \ |
| 1414 if (stackptr && (Include & RegSet_StackPointer)) \ | 1283 if (stackptr && (Include & RegSet_StackPointer)) \ |
| 1415 Registers[RegX8632::val] = true; \ | 1284 Registers[RegX8632::val] = true; \ |
| 1416 if (frameptr && (Include & RegSet_FramePointer)) \ | 1285 if (frameptr && (Include & RegSet_FramePointer)) \ |
| 1417 Registers[RegX8632::val] = true; \ | 1286 Registers[RegX8632::val] = true; \ |
| 1418 if (scratch && (Exclude & RegSet_CallerSave)) \ | 1287 if (scratch && (Exclude & RegSet_CallerSave)) \ |
| 1419 Registers[RegX8632::val] = false; \ | 1288 Registers[RegX8632::val] = false; \ |
| 1420 if (preserved && (Exclude & RegSet_CalleeSave)) \ | 1289 if (preserved && (Exclude & RegSet_CalleeSave)) \ |
| 1421 Registers[RegX8632::val] = false; \ | 1290 Registers[RegX8632::val] = false; \ |
| 1422 if (stackptr && (Exclude & RegSet_StackPointer)) \ | 1291 if (stackptr && (Exclude & RegSet_StackPointer)) \ |
| 1423 Registers[RegX8632::val] = false; \ | 1292 Registers[RegX8632::val] = false; \ |
| 1424 if (frameptr && (Exclude & RegSet_FramePointer)) \ | 1293 if (frameptr && (Exclude & RegSet_FramePointer)) \ |
| 1425 Registers[RegX8632::val] = false; | 1294 Registers[RegX8632::val] = false; |
| 1426 | 1295 |
| 1427 REGX8632_TABLE | 1296 REGX8632_TABLE |
| 1428 | 1297 |
| 1429 #undef X | 1298 #undef X |
| 1430 | 1299 |
| 1431 return Registers; | 1300 return Registers; |
| 1432 } | 1301 } |
| 1433 | 1302 |
| 1434 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 1303 template <class Machine> |
| 1304 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | |
| 1435 IsEbpBasedFrame = true; | 1305 IsEbpBasedFrame = true; |
| 1436 // Conservatively require the stack to be aligned. Some stack | 1306 // Conservatively require the stack to be aligned. Some stack |
| 1437 // adjustment operations implemented below assume that the stack is | 1307 // adjustment operations implemented below assume that the stack is |
| 1438 // aligned before the alloca. All the alloca code ensures that the | 1308 // aligned before the alloca. All the alloca code ensures that the |
| 1439 // stack alignment is preserved after the alloca. The stack alignment | 1309 // stack alignment is preserved after the alloca. The stack alignment |
| 1440 // restriction can be relaxed in some cases. | 1310 // restriction can be relaxed in some cases. |
| 1441 NeedsStackAlignment = true; | 1311 NeedsStackAlignment = true; |
| 1442 | 1312 |
| 1443 // TODO(stichnot): minimize the number of adjustments of esp, etc. | 1313 // TODO(stichnot): minimize the number of adjustments of esp, etc. |
| 1444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); | 1314 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); |
| 1445 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1315 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
| 1446 Variable *Dest = Inst->getDest(); | 1316 Variable *Dest = Inst->getDest(); |
| 1447 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 1317 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
| 1448 // For default align=0, set it to the real value 1, to avoid any | 1318 // For default align=0, set it to the real value 1, to avoid any |
| 1449 // bit-manipulation problems below. | 1319 // bit-manipulation problems below. |
| 1450 AlignmentParam = std::max(AlignmentParam, 1u); | 1320 AlignmentParam = std::max(AlignmentParam, 1u); |
| 1451 | 1321 |
| 1452 // LLVM enforces power of 2 alignment. | 1322 // LLVM enforces power of 2 alignment. |
| 1453 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1323 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES)); | 1324 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
| 1455 | 1325 |
| 1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); | 1326 uint32_t Alignment = |
| 1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { | 1327 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
| 1328 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { | |
| 1458 _and(esp, Ctx->getConstantInt32(-Alignment)); | 1329 _and(esp, Ctx->getConstantInt32(-Alignment)); |
| 1459 } | 1330 } |
| 1460 if (const auto *ConstantTotalSize = | 1331 if (const auto *ConstantTotalSize = |
| 1461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1332 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 1462 uint32_t Value = ConstantTotalSize->getValue(); | 1333 uint32_t Value = ConstantTotalSize->getValue(); |
| 1463 Value = Utils::applyAlignment(Value, Alignment); | 1334 Value = Utils::applyAlignment(Value, Alignment); |
| 1464 _sub(esp, Ctx->getConstantInt32(Value)); | 1335 _sub(esp, Ctx->getConstantInt32(Value)); |
| 1465 } else { | 1336 } else { |
| 1466 // Non-constant sizes need to be adjusted to the next highest | 1337 // Non-constant sizes need to be adjusted to the next highest |
| 1467 // multiple of the required alignment at runtime. | 1338 // multiple of the required alignment at runtime. |
| 1468 Variable *T = makeReg(IceType_i32); | 1339 Variable *T = makeReg(IceType_i32); |
| 1469 _mov(T, TotalSize); | 1340 _mov(T, TotalSize); |
| 1470 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1341 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1471 _and(T, Ctx->getConstantInt32(-Alignment)); | 1342 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1472 _sub(esp, T); | 1343 _sub(esp, T); |
| 1473 } | 1344 } |
| 1474 _mov(Dest, esp); | 1345 _mov(Dest, esp); |
| 1475 } | 1346 } |
| 1476 | 1347 |
| 1477 // Strength-reduce scalar integer multiplication by a constant (for | 1348 // Strength-reduce scalar integer multiplication by a constant (for |
| 1478 // i32 or narrower) for certain constants. The lea instruction can be | 1349 // i32 or narrower) for certain constants. The lea instruction can be |
| 1479 // used to multiply by 3, 5, or 9, and the lsh instruction can be used | 1350 // used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| 1480 // to multiply by powers of 2. These can be combined such that | 1351 // to multiply by powers of 2. These can be combined such that |
| 1481 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, | 1352 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| 1482 // combined with left-shifting by 2. | 1353 // combined with left-shifting by 2. |
| 1483 bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1354 template <class Machine> |
| 1484 int32_t Src1) { | 1355 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1356 int32_t Src1) { | |
| 1485 // Disable this optimization for Om1 and O0, just to keep things | 1357 // Disable this optimization for Om1 and O0, just to keep things |
| 1486 // simple there. | 1358 // simple there. |
| 1487 if (Ctx->getFlags().getOptLevel() < Opt_1) | 1359 if (Ctx->getFlags().getOptLevel() < Opt_1) |
| 1488 return false; | 1360 return false; |
| 1489 Type Ty = Dest->getType(); | 1361 Type Ty = Dest->getType(); |
| 1490 Variable *T = nullptr; | 1362 Variable *T = nullptr; |
| 1491 if (Src1 == -1) { | 1363 if (Src1 == -1) { |
| 1492 _mov(T, Src0); | 1364 _mov(T, Src0); |
| 1493 _neg(T); | 1365 _neg(T); |
| 1494 _mov(Dest, T); | 1366 _mov(Dest, T); |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1563 } | 1435 } |
| 1564 if (Count2) { | 1436 if (Count2) { |
| 1565 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1437 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
| 1566 } | 1438 } |
| 1567 if (Src1IsNegative) | 1439 if (Src1IsNegative) |
| 1568 _neg(T); | 1440 _neg(T); |
| 1569 _mov(Dest, T); | 1441 _mov(Dest, T); |
| 1570 return true; | 1442 return true; |
| 1571 } | 1443 } |
| 1572 | 1444 |
| 1573 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1445 template <class Machine> |
| 1446 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | |
| 1574 Variable *Dest = Inst->getDest(); | 1447 Variable *Dest = Inst->getDest(); |
| 1575 Operand *Src0 = legalize(Inst->getSrc(0)); | 1448 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1576 Operand *Src1 = legalize(Inst->getSrc(1)); | 1449 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1577 if (Inst->isCommutative()) { | 1450 if (Inst->isCommutative()) { |
| 1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | 1451 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) |
| 1579 std::swap(Src0, Src1); | 1452 std::swap(Src0, Src1); |
| 1580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) | 1453 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) |
| 1581 std::swap(Src0, Src1); | 1454 std::swap(Src0, Src1); |
| 1582 } | 1455 } |
| 1583 if (Dest->getType() == IceType_i64) { | 1456 if (Dest->getType() == IceType_i64) { |
| (...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1866 case InstArithmetic::Sub: { | 1739 case InstArithmetic::Sub: { |
| 1867 Variable *T = makeReg(Dest->getType()); | 1740 Variable *T = makeReg(Dest->getType()); |
| 1868 _movp(T, Src0); | 1741 _movp(T, Src0); |
| 1869 _psub(T, Src1); | 1742 _psub(T, Src1); |
| 1870 _movp(Dest, T); | 1743 _movp(Dest, T); |
| 1871 } break; | 1744 } break; |
| 1872 case InstArithmetic::Mul: { | 1745 case InstArithmetic::Mul: { |
| 1873 bool TypesAreValidForPmull = | 1746 bool TypesAreValidForPmull = |
| 1874 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; | 1747 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; |
| 1875 bool InstructionSetIsValidForPmull = | 1748 bool InstructionSetIsValidForPmull = |
| 1876 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; | 1749 Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1; |
| 1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 1750 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| 1878 Variable *T = makeReg(Dest->getType()); | 1751 Variable *T = makeReg(Dest->getType()); |
| 1879 _movp(T, Src0); | 1752 _movp(T, Src0); |
| 1880 _pmull(T, Src1); | 1753 _pmull(T, Src1); |
| 1881 _movp(Dest, T); | 1754 _movp(Dest, T); |
| 1882 } else if (Dest->getType() == IceType_v4i32) { | 1755 } else if (Dest->getType() == IceType_v4i32) { |
| 1883 // Lowering sequence: | 1756 // Lowering sequence: |
| 1884 // Note: The mask arguments have index 0 on the left. | 1757 // Note: The mask arguments have index 0 on the left. |
| 1885 // | 1758 // |
| 1886 // movups T1, Src0 | 1759 // movups T1, Src0 |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 1933 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 2061 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 1934 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 2062 Type Ty = Dest->getType(); | 1935 Type Ty = Dest->getType(); |
| 2063 // LLVM does the following for dest=src/(1<<log): | 1936 // LLVM does the following for dest=src/(1<<log): |
| 2064 // t=src | 1937 // t=src |
| 2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 1938 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 2066 // shr t,typewidth-log | 1939 // shr t,typewidth-log |
| 2067 // add t,src | 1940 // add t,src |
| 2068 // sar t,log | 1941 // sar t,log |
| 2069 // dest=t | 1942 // dest=t |
| 2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); | 1943 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 2071 _mov(T, Src0); | 1944 _mov(T, Src0); |
| 2072 // If for some reason we are dividing by 1, just treat it | 1945 // If for some reason we are dividing by 1, just treat it |
| 2073 // like an assignment. | 1946 // like an assignment. |
| 2074 if (LogDiv > 0) { | 1947 if (LogDiv > 0) { |
| 2075 // The initial sar is unnecessary when dividing by 2. | 1948 // The initial sar is unnecessary when dividing by 2. |
| 2076 if (LogDiv > 1) | 1949 if (LogDiv > 1) |
| 2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 1950 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 1951 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 2079 _add(T, Src0); | 1952 _add(T, Src0); |
| 2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1953 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2129 Type Ty = Dest->getType(); | 2002 Type Ty = Dest->getType(); |
| 2130 // LLVM does the following for dest=src%(1<<log): | 2003 // LLVM does the following for dest=src%(1<<log): |
| 2131 // t=src | 2004 // t=src |
| 2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2005 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 2133 // shr t,typewidth-log | 2006 // shr t,typewidth-log |
| 2134 // add t,src | 2007 // add t,src |
| 2135 // and t, -(1<<log) | 2008 // and t, -(1<<log) |
| 2136 // sub t,src | 2009 // sub t,src |
| 2137 // neg t | 2010 // neg t |
| 2138 // dest=t | 2011 // dest=t |
| 2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); | 2012 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 2140 // If for some reason we are dividing by 1, just assign 0. | 2013 // If for some reason we are dividing by 1, just assign 0. |
| 2141 if (LogDiv == 0) { | 2014 if (LogDiv == 0) { |
| 2142 _mov(Dest, Ctx->getConstantZero(Ty)); | 2015 _mov(Dest, Ctx->getConstantZero(Ty)); |
| 2143 return; | 2016 return; |
| 2144 } | 2017 } |
| 2145 _mov(T, Src0); | 2018 _mov(T, Src0); |
| 2146 // The initial sar is unnecessary when dividing by 2. | 2019 // The initial sar is unnecessary when dividing by 2. |
| 2147 if (LogDiv > 1) | 2020 if (LogDiv > 1) |
| 2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 2021 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 2022 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2197 Type Ty = Dest->getType(); | 2070 Type Ty = Dest->getType(); |
| 2198 InstCall *Call = makeHelperCall( | 2071 InstCall *Call = makeHelperCall( |
| 2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 2072 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
| 2200 Call->addArg(Src0); | 2073 Call->addArg(Src0); |
| 2201 Call->addArg(Src1); | 2074 Call->addArg(Src1); |
| 2202 return lowerCall(Call); | 2075 return lowerCall(Call); |
| 2203 } | 2076 } |
| 2204 } | 2077 } |
| 2205 } | 2078 } |
| 2206 | 2079 |
| 2207 void TargetX8632::lowerAssign(const InstAssign *Inst) { | 2080 template <class Machine> |
| 2081 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | |
| 2208 Variable *Dest = Inst->getDest(); | 2082 Variable *Dest = Inst->getDest(); |
| 2209 Operand *Src0 = Inst->getSrc(0); | 2083 Operand *Src0 = Inst->getSrc(0); |
| 2210 assert(Dest->getType() == Src0->getType()); | 2084 assert(Dest->getType() == Src0->getType()); |
| 2211 if (Dest->getType() == IceType_i64) { | 2085 if (Dest->getType() == IceType_i64) { |
| 2212 Src0 = legalize(Src0); | 2086 Src0 = legalize(Src0); |
| 2213 Operand *Src0Lo = loOperand(Src0); | 2087 Operand *Src0Lo = loOperand(Src0); |
| 2214 Operand *Src0Hi = hiOperand(Src0); | 2088 Operand *Src0Hi = hiOperand(Src0); |
| 2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2089 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2090 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2217 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 2091 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 2242 // register or a scalar integer immediate. | 2116 // register or a scalar integer immediate. |
| 2243 RI = legalize(Src0, Legal_Reg | Legal_Imm); | 2117 RI = legalize(Src0, Legal_Reg | Legal_Imm); |
| 2244 } | 2118 } |
| 2245 if (isVectorType(Dest->getType())) | 2119 if (isVectorType(Dest->getType())) |
| 2246 _movp(Dest, RI); | 2120 _movp(Dest, RI); |
| 2247 else | 2121 else |
| 2248 _mov(Dest, RI); | 2122 _mov(Dest, RI); |
| 2249 } | 2123 } |
| 2250 } | 2124 } |
| 2251 | 2125 |
| 2252 void TargetX8632::lowerBr(const InstBr *Inst) { | 2126 template <class Machine> |
| 2127 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { | |
| 2253 if (Inst->isUnconditional()) { | 2128 if (Inst->isUnconditional()) { |
| 2254 _br(Inst->getTargetUnconditional()); | 2129 _br(Inst->getTargetUnconditional()); |
| 2255 return; | 2130 return; |
| 2256 } | 2131 } |
| 2257 Operand *Cond = Inst->getCondition(); | 2132 Operand *Cond = Inst->getCondition(); |
| 2258 | 2133 |
| 2259 // Handle folding opportunities. | 2134 // Handle folding opportunities. |
| 2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { | 2135 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| 2261 assert(Producer->isDeleted()); | 2136 assert(Producer->isDeleted()); |
| 2262 switch (BoolFolding::getProducerKind(Producer)) { | 2137 switch (BoolFolding::getProducerKind(Producer)) { |
| 2263 default: | 2138 default: |
| 2264 break; | 2139 break; |
| 2265 case BoolFolding::PK_Icmp32: { | 2140 case BoolFolding::PK_Icmp32: { |
| 2266 // TODO(stichnot): Refactor similarities between this block and | 2141 // TODO(stichnot): Refactor similarities between this block and |
| 2267 // the corresponding code in lowerIcmp(). | 2142 // the corresponding code in lowerIcmp(). |
| 2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 2143 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| 2269 Operand *Src0 = Producer->getSrc(0); | 2144 Operand *Src0 = Producer->getSrc(0); |
| 2270 Operand *Src1 = legalize(Producer->getSrc(1)); | 2145 Operand *Src1 = legalize(Producer->getSrc(1)); |
| 2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2146 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 2272 _cmp(Src0RM, Src1); | 2147 _cmp(Src0RM, Src1); |
| 2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), | 2148 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), |
| 2274 Inst->getTargetFalse()); | 2149 Inst->getTargetFalse()); |
| 2275 return; | 2150 return; |
| 2276 } | 2151 } |
| 2277 } | 2152 } |
| 2278 } | 2153 } |
| 2279 | 2154 |
| 2280 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2155 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 2281 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2156 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2282 _cmp(Src0, Zero); | 2157 _cmp(Src0, Zero); |
| 2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 2158 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| 2284 } | 2159 } |
| 2285 | 2160 |
| 2286 void TargetX8632::lowerCall(const InstCall *Instr) { | 2161 template <class Machine> |
| 2162 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { | |
| 2287 // x86-32 calling convention: | 2163 // x86-32 calling convention: |
| 2288 // | 2164 // |
| 2289 // * At the point before the call, the stack must be aligned to 16 | 2165 // * At the point before the call, the stack must be aligned to 16 |
| 2290 // bytes. | 2166 // bytes. |
| 2291 // | 2167 // |
| 2292 // * The first four arguments of vector type, regardless of their | 2168 // * The first four arguments of vector type, regardless of their |
| 2293 // position relative to the other arguments in the argument list, are | 2169 // position relative to the other arguments in the argument list, are |
| 2294 // placed in registers xmm0 - xmm3. | 2170 // placed in registers xmm0 - xmm3. |
| 2295 // | 2171 // |
| 2296 // * Other arguments are pushed onto the stack in right-to-left order, | 2172 // * Other arguments are pushed onto the stack in right-to-left order, |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 2311 OperandList StackArgs, StackArgLocations; | 2187 OperandList StackArgs, StackArgLocations; |
| 2312 uint32_t ParameterAreaSizeBytes = 0; | 2188 uint32_t ParameterAreaSizeBytes = 0; |
| 2313 | 2189 |
| 2314 // Classify each argument operand according to the location where the | 2190 // Classify each argument operand according to the location where the |
| 2315 // argument is passed. | 2191 // argument is passed. |
| 2316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 2192 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 2317 Operand *Arg = Instr->getArg(i); | 2193 Operand *Arg = Instr->getArg(i); |
| 2318 Type Ty = Arg->getType(); | 2194 Type Ty = Arg->getType(); |
| 2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 2195 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 2320 assert(typeWidthInBytes(Ty) >= 4); | 2196 assert(typeWidthInBytes(Ty) >= 4); |
| 2321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 2197 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| 2322 XmmArgs.push_back(Arg); | 2198 XmmArgs.push_back(Arg); |
| 2323 } else { | 2199 } else { |
| 2324 StackArgs.push_back(Arg); | 2200 StackArgs.push_back(Arg); |
| 2325 if (isVectorType(Arg->getType())) { | 2201 if (isVectorType(Arg->getType())) { |
| 2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 2202 ParameterAreaSizeBytes = |
| 2203 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 2327 } | 2204 } |
| 2328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 2205 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
| 2329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 2206 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 2330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | 2207 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
| 2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 2208 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 2332 } | 2209 } |
| 2333 } | 2210 } |
| 2334 | 2211 |
| 2335 // Adjust the parameter area so that the stack is aligned. It is | 2212 // Adjust the parameter area so that the stack is aligned. It is |
| 2336 // assumed that the stack is already aligned at the start of the | 2213 // assumed that the stack is already aligned at the start of the |
| 2337 // calling sequence. | 2214 // calling sequence. |
| 2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 2215 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 2339 | 2216 |
| 2340 // Subtract the appropriate amount for the argument area. This also | 2217 // Subtract the appropriate amount for the argument area. This also |
| 2341 // takes care of setting the stack adjustment during emission. | 2218 // takes care of setting the stack adjustment during emission. |
| 2342 // | 2219 // |
| 2343 // TODO: If for some reason the call instruction gets dead-code | 2220 // TODO: If for some reason the call instruction gets dead-code |
| 2344 // eliminated after lowering, we would need to ensure that the | 2221 // eliminated after lowering, we would need to ensure that the |
| 2345 // pre-call and the post-call esp adjustment get eliminated as well. | 2222 // pre-call and the post-call esp adjustment get eliminated as well. |
| 2346 if (ParameterAreaSizeBytes) { | 2223 if (ParameterAreaSizeBytes) { |
| 2347 _adjust_stack(ParameterAreaSizeBytes); | 2224 _adjust_stack(ParameterAreaSizeBytes); |
| 2348 } | 2225 } |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2411 Operand *CallTarget = legalize(Instr->getCallTarget()); | 2288 Operand *CallTarget = legalize(Instr->getCallTarget()); |
| 2412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 2289 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 2413 if (NeedSandboxing) { | 2290 if (NeedSandboxing) { |
| 2414 if (llvm::isa<Constant>(CallTarget)) { | 2291 if (llvm::isa<Constant>(CallTarget)) { |
| 2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | 2292 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 2416 } else { | 2293 } else { |
| 2417 Variable *CallTargetVar = nullptr; | 2294 Variable *CallTargetVar = nullptr; |
| 2418 _mov(CallTargetVar, CallTarget); | 2295 _mov(CallTargetVar, CallTarget); |
| 2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | 2296 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 2420 const SizeT BundleSize = | 2297 const SizeT BundleSize = |
| 2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 2298 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | 2299 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 2423 CallTarget = CallTargetVar; | 2300 CallTarget = CallTargetVar; |
| 2424 } | 2301 } |
| 2425 } | 2302 } |
| 2426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 2303 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
| 2427 Context.insert(NewCall); | 2304 Context.insert(NewCall); |
| 2428 if (NeedSandboxing) | 2305 if (NeedSandboxing) |
| 2429 _bundle_unlock(); | 2306 _bundle_unlock(); |
| 2430 if (ReturnRegHi) | 2307 if (ReturnRegHi) |
| 2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 2308 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2473 // st(0). | 2350 // st(0). |
| 2474 // If Dest ends up being a physical xmm register, the fstp emit code | 2351 // If Dest ends up being a physical xmm register, the fstp emit code |
| 2475 // will route st(0) through a temporary stack slot. | 2352 // will route st(0) through a temporary stack slot. |
| 2476 _fstp(Dest); | 2353 _fstp(Dest); |
| 2477 // Create a fake use of Dest in case it actually isn't used, | 2354 // Create a fake use of Dest in case it actually isn't used, |
| 2478 // because st(0) still needs to be popped. | 2355 // because st(0) still needs to be popped. |
| 2479 Context.insert(InstFakeUse::create(Func, Dest)); | 2356 Context.insert(InstFakeUse::create(Func, Dest)); |
| 2480 } | 2357 } |
| 2481 } | 2358 } |
| 2482 | 2359 |
| 2483 void TargetX8632::lowerCast(const InstCast *Inst) { | 2360 template <class Machine> |
| 2361 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | |
| 2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 2362 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
| 2485 InstCast::OpKind CastKind = Inst->getCastKind(); | 2363 InstCast::OpKind CastKind = Inst->getCastKind(); |
| 2486 Variable *Dest = Inst->getDest(); | 2364 Variable *Dest = Inst->getDest(); |
| 2487 switch (CastKind) { | 2365 switch (CastKind) { |
| 2488 default: | 2366 default: |
| 2489 Func->setError("Cast type not supported"); | 2367 Func->setError("Cast type not supported"); |
| 2490 return; | 2368 return; |
| 2491 case InstCast::Sext: { | 2369 case InstCast::Sext: { |
| 2492 // Src0RM is the source operand legalized to physical register or memory, | 2370 // Src0RM is the source operand legalized to physical register or memory, |
| 2493 // but not immediate, since the relevant x86 native instructions don't | 2371 // but not immediate, since the relevant x86 native instructions don't |
| 2494 // allow an immediate operand. If the operand is an immediate, we could | 2372 // allow an immediate operand. If the operand is an immediate, we could |
| 2495 // consider computing the strength-reduced result at translation time, | 2373 // consider computing the strength-reduced result at translation time, |
| 2496 // but we're unlikely to see something like that in the bitcode that | 2374 // but we're unlikely to see something like that in the bitcode that |
| 2497 // the optimizer wouldn't have already taken care of. | 2375 // the optimizer wouldn't have already taken care of. |
| 2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2376 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2499 if (isVectorType(Dest->getType())) { | 2377 if (isVectorType(Dest->getType())) { |
| 2500 Type DestTy = Dest->getType(); | 2378 Type DestTy = Dest->getType(); |
| 2501 if (DestTy == IceType_v16i8) { | 2379 if (DestTy == IceType_v16i8) { |
| 2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 | 2380 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
| 2503 Variable *OneMask = makeVectorOfOnes(Dest->getType()); | 2381 Variable *OneMask = makeVectorOfOnes(Dest->getType()); |
| 2504 Variable *T = makeReg(DestTy); | 2382 Variable *T = makeReg(DestTy); |
| 2505 _movp(T, Src0RM); | 2383 _movp(T, Src0RM); |
| 2506 _pand(T, OneMask); | 2384 _pand(T, OneMask); |
| 2507 Variable *Zeros = makeVectorOfZeros(Dest->getType()); | 2385 Variable *Zeros = makeVectorOfZeros(Dest->getType()); |
| 2508 _pcmpgt(T, Zeros); | 2386 _pcmpgt(T, Zeros); |
| 2509 _movp(Dest, T); | 2387 _movp(Dest, T); |
| 2510 } else { | 2388 } else { |
| 2511 // width = width(elty) - 1; dest = (src << width) >> width | 2389 // width = width(elty) - 1; dest = (src << width) >> width |
| 2512 SizeT ShiftAmount = | 2390 SizeT ShiftAmount = |
| 2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; | 2391 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 2392 1; | |
| 2514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 2393 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 2515 Variable *T = makeReg(DestTy); | 2394 Variable *T = makeReg(DestTy); |
| 2516 _movp(T, Src0RM); | 2395 _movp(T, Src0RM); |
| 2517 _psll(T, ShiftConstant); | 2396 _psll(T, ShiftConstant); |
| 2518 _psra(T, ShiftConstant); | 2397 _psra(T, ShiftConstant); |
| 2519 _movp(Dest, T); | 2398 _movp(Dest, T); |
| 2520 } | 2399 } |
| 2521 } else if (Dest->getType() == IceType_i64) { | 2400 } else if (Dest->getType() == IceType_i64) { |
| 2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | 2401 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 |
| 2523 Constant *Shift = Ctx->getConstantInt32(31); | 2402 Constant *Shift = Ctx->getConstantInt32(31); |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 2538 _mov(T_Hi, T_Lo); | 2417 _mov(T_Hi, T_Lo); |
| 2539 if (Src0RM->getType() != IceType_i1) | 2418 if (Src0RM->getType() != IceType_i1) |
| 2540 // For i1, the sar instruction is already done above. | 2419 // For i1, the sar instruction is already done above. |
| 2541 _sar(T_Hi, Shift); | 2420 _sar(T_Hi, Shift); |
| 2542 _mov(DestHi, T_Hi); | 2421 _mov(DestHi, T_Hi); |
| 2543 } else if (Src0RM->getType() == IceType_i1) { | 2422 } else if (Src0RM->getType() == IceType_i1) { |
| 2544 // t1 = src | 2423 // t1 = src |
| 2545 // shl t1, dst_bitwidth - 1 | 2424 // shl t1, dst_bitwidth - 1 |
| 2546 // sar t1, dst_bitwidth - 1 | 2425 // sar t1, dst_bitwidth - 1 |
| 2547 // dst = t1 | 2426 // dst = t1 |
| 2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); | 2427 size_t DestBits = |
| 2428 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); | |
| 2549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); | 2429 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); |
| 2550 Variable *T = makeReg(Dest->getType()); | 2430 Variable *T = makeReg(Dest->getType()); |
| 2551 if (typeWidthInBytes(Dest->getType()) <= | 2431 if (typeWidthInBytes(Dest->getType()) <= |
| 2552 typeWidthInBytes(Src0RM->getType())) { | 2432 typeWidthInBytes(Src0RM->getType())) { |
| 2553 _mov(T, Src0RM); | 2433 _mov(T, Src0RM); |
| 2554 } else { | 2434 } else { |
| 2555 // Widen the source using movsx or movzx. (It doesn't matter | 2435 // Widen the source using movsx or movzx. (It doesn't matter |
| 2556 // which one, since the following shl/sar overwrite the bits.) | 2436 // which one, since the following shl/sar overwrite the bits.) |
| 2557 _movzx(T, Src0RM); | 2437 _movzx(T, Src0RM); |
| 2558 } | 2438 } |
| (...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2943 case IceType_v4i32: | 2823 case IceType_v4i32: |
| 2944 case IceType_v4f32: { | 2824 case IceType_v4f32: { |
| 2945 _movp(Dest, legalizeToVar(Src0)); | 2825 _movp(Dest, legalizeToVar(Src0)); |
| 2946 } break; | 2826 } break; |
| 2947 } | 2827 } |
| 2948 break; | 2828 break; |
| 2949 } | 2829 } |
| 2950 } | 2830 } |
| 2951 } | 2831 } |
| 2952 | 2832 |
| 2953 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { | 2833 template <class Machine> |
| 2834 void TargetX86Base<Machine>::lowerExtractElement( | |
| 2835 const InstExtractElement *Inst) { | |
| 2954 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2836 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2955 ConstantInteger32 *ElementIndex = | 2837 ConstantInteger32 *ElementIndex = |
| 2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2838 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
| 2957 // Only constant indices are allowed in PNaCl IR. | 2839 // Only constant indices are allowed in PNaCl IR. |
| 2958 assert(ElementIndex); | 2840 assert(ElementIndex); |
| 2959 | 2841 |
| 2960 unsigned Index = ElementIndex->getValue(); | 2842 unsigned Index = ElementIndex->getValue(); |
| 2961 Type Ty = SourceVectNotLegalized->getType(); | 2843 Type Ty = SourceVectNotLegalized->getType(); |
| 2962 Type ElementTy = typeElementType(Ty); | 2844 Type ElementTy = typeElementType(Ty); |
| 2963 Type InVectorElementTy = getInVectorElementType(Ty); | 2845 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
| 2964 Variable *ExtractedElementR = makeReg(InVectorElementTy); | 2846 Variable *ExtractedElementR = makeReg(InVectorElementTy); |
| 2965 | 2847 |
| 2966 // TODO(wala): Determine the best lowering sequences for each type. | 2848 // TODO(wala): Determine the best lowering sequences for each type. |
| 2967 bool CanUsePextr = | 2849 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 2968 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2850 InstructionSet >= Machine::SSE4_1; |
| 2969 if (CanUsePextr && Ty != IceType_v4f32) { | 2851 if (CanUsePextr && Ty != IceType_v4f32) { |
| 2970 // Use pextrb, pextrw, or pextrd. | 2852 // Use pextrb, pextrw, or pextrd. |
| 2971 Constant *Mask = Ctx->getConstantInt32(Index); | 2853 Constant *Mask = Ctx->getConstantInt32(Index); |
| 2972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2854 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
| 2973 _pextr(ExtractedElementR, SourceVectR, Mask); | 2855 _pextr(ExtractedElementR, SourceVectR, Mask); |
| 2974 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2856 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2975 // Use pshufd and movd/movss. | 2857 // Use pshufd and movd/movss. |
| 2976 Variable *T = nullptr; | 2858 Variable *T = nullptr; |
| 2977 if (Index) { | 2859 if (Index) { |
| 2978 // The shuffle only needs to occur if the element to be extracted | 2860 // The shuffle only needs to occur if the element to be extracted |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); | 2901 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); |
| 3020 lowerCast(Cast); | 2902 lowerCast(Cast); |
| 3021 ExtractedElementR = T; | 2903 ExtractedElementR = T; |
| 3022 } | 2904 } |
| 3023 | 2905 |
| 3024 // Copy the element to the destination. | 2906 // Copy the element to the destination. |
| 3025 Variable *Dest = Inst->getDest(); | 2907 Variable *Dest = Inst->getDest(); |
| 3026 _mov(Dest, ExtractedElementR); | 2908 _mov(Dest, ExtractedElementR); |
| 3027 } | 2909 } |
| 3028 | 2910 |
| 3029 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 2911 template <class Machine> |
| 2912 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { | |
| 3030 Operand *Src0 = Inst->getSrc(0); | 2913 Operand *Src0 = Inst->getSrc(0); |
| 3031 Operand *Src1 = Inst->getSrc(1); | 2914 Operand *Src1 = Inst->getSrc(1); |
| 3032 Variable *Dest = Inst->getDest(); | 2915 Variable *Dest = Inst->getDest(); |
| 3033 | 2916 |
| 3034 if (isVectorType(Dest->getType())) { | 2917 if (isVectorType(Dest->getType())) { |
| 3035 InstFcmp::FCond Condition = Inst->getCondition(); | 2918 InstFcmp::FCond Condition = Inst->getCondition(); |
| 3036 size_t Index = static_cast<size_t>(Condition); | 2919 size_t Index = static_cast<size_t>(Condition); |
| 3037 assert(Index < TableFcmpSize); | 2920 assert(Index < Traits::TableFcmpSize); |
| 3038 | 2921 |
| 3039 if (TableFcmp[Index].SwapVectorOperands) { | 2922 if (Traits::TableFcmp[Index].SwapVectorOperands) { |
| 3040 Operand *T = Src0; | 2923 Operand *T = Src0; |
| 3041 Src0 = Src1; | 2924 Src0 = Src1; |
| 3042 Src1 = T; | 2925 Src1 = T; |
| 3043 } | 2926 } |
| 3044 | 2927 |
| 3045 Variable *T = nullptr; | 2928 Variable *T = nullptr; |
| 3046 | 2929 |
| 3047 if (Condition == InstFcmp::True) { | 2930 if (Condition == InstFcmp::True) { |
| 3048 // makeVectorOfOnes() requires an integer vector type. | 2931 // makeVectorOfOnes() requires an integer vector type. |
| 3049 T = makeVectorOfMinusOnes(IceType_v4i32); | 2932 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 3050 } else if (Condition == InstFcmp::False) { | 2933 } else if (Condition == InstFcmp::False) { |
| 3051 T = makeVectorOfZeros(Dest->getType()); | 2934 T = makeVectorOfZeros(Dest->getType()); |
| 3052 } else { | 2935 } else { |
| 3053 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2936 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 3054 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2937 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 3055 if (llvm::isa<OperandX8632Mem>(Src1RM)) | 2938 if (llvm::isa<OperandX8632Mem>(Src1RM)) |
| 3056 Src1RM = legalizeToVar(Src1RM); | 2939 Src1RM = legalizeToVar(Src1RM); |
| 3057 | 2940 |
| 3058 switch (Condition) { | 2941 switch (Condition) { |
| 3059 default: { | 2942 default: { |
| 3060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2943 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate; |
| 3061 assert(Predicate != CondX86::Cmpps_Invalid); | 2944 assert(Predicate != CondX86::Cmpps_Invalid); |
| 3062 T = makeReg(Src0RM->getType()); | 2945 T = makeReg(Src0RM->getType()); |
| 3063 _movp(T, Src0RM); | 2946 _movp(T, Src0RM); |
| 3064 _cmpps(T, Src1RM, Predicate); | 2947 _cmpps(T, Src1RM, Predicate); |
| 3065 } break; | 2948 } break; |
| 3066 case InstFcmp::One: { | 2949 case InstFcmp::One: { |
| 3067 // Check both unequal and ordered. | 2950 // Check both unequal and ordered. |
| 3068 T = makeReg(Src0RM->getType()); | 2951 T = makeReg(Src0RM->getType()); |
| 3069 Variable *T2 = makeReg(Src0RM->getType()); | 2952 Variable *T2 = makeReg(Src0RM->getType()); |
| 3070 _movp(T, Src0RM); | 2953 _movp(T, Src0RM); |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 3099 // j<C2> label /* only if C2 != Br_None */ | 2982 // j<C2> label /* only if C2 != Br_None */ |
| 3100 // FakeUse(a) /* only if C1 != Br_None */ | 2983 // FakeUse(a) /* only if C1 != Br_None */ |
| 3101 // mov a, !<default> /* only if C1 != Br_None */ | 2984 // mov a, !<default> /* only if C1 != Br_None */ |
| 3102 // label: /* only if C1 != Br_None */ | 2985 // label: /* only if C1 != Br_None */ |
| 3103 // | 2986 // |
| 3104 // setcc lowering when C1 != Br_None && C2 == Br_None: | 2987 // setcc lowering when C1 != Br_None && C2 == Br_None: |
| 3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ | 2988 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ |
| 3106 // setcc a, C1 | 2989 // setcc a, C1 |
| 3107 InstFcmp::FCond Condition = Inst->getCondition(); | 2990 InstFcmp::FCond Condition = Inst->getCondition(); |
| 3108 size_t Index = static_cast<size_t>(Condition); | 2991 size_t Index = static_cast<size_t>(Condition); |
| 3109 assert(Index < TableFcmpSize); | 2992 assert(Index < Traits::TableFcmpSize); |
| 3110 if (TableFcmp[Index].SwapScalarOperands) | 2993 if (Traits::TableFcmp[Index].SwapScalarOperands) |
| 3111 std::swap(Src0, Src1); | 2994 std::swap(Src0, Src1); |
| 3112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); | 2995 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None); |
| 3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); | 2996 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None); |
| 3114 if (HasC1) { | 2997 if (HasC1) { |
| 3115 Src0 = legalize(Src0); | 2998 Src0 = legalize(Src0); |
| 3116 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2999 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 3117 Variable *T = nullptr; | 3000 Variable *T = nullptr; |
| 3118 _mov(T, Src0); | 3001 _mov(T, Src0); |
| 3119 _ucomiss(T, Src1RM); | 3002 _ucomiss(T, Src1RM); |
| 3120 if (!HasC2) { | 3003 if (!HasC2) { |
| 3121 assert(TableFcmp[Index].Default); | 3004 assert(Traits::TableFcmp[Index].Default); |
| 3122 _setcc(Dest, TableFcmp[Index].C1); | 3005 _setcc(Dest, Traits::TableFcmp[Index].C1); |
| 3123 return; | 3006 return; |
| 3124 } | 3007 } |
| 3125 } | 3008 } |
| 3126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default); | 3009 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default); |
| 3127 _mov(Dest, Default); | 3010 _mov(Dest, Default); |
| 3128 if (HasC1) { | 3011 if (HasC1) { |
| 3129 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3012 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 3130 _br(TableFcmp[Index].C1, Label); | 3013 _br(Traits::TableFcmp[Index].C1, Label); |
| 3131 if (HasC2) { | 3014 if (HasC2) { |
| 3132 _br(TableFcmp[Index].C2, Label); | 3015 _br(Traits::TableFcmp[Index].C2, Label); |
| 3133 } | 3016 } |
| 3134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default); | 3017 Constant *NonDefault = |
| 3018 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default); | |
| 3135 _mov_nonkillable(Dest, NonDefault); | 3019 _mov_nonkillable(Dest, NonDefault); |
| 3136 Context.insert(Label); | 3020 Context.insert(Label); |
| 3137 } | 3021 } |
| 3138 } | 3022 } |
| 3139 | 3023 |
| 3140 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 3024 template <class Machine> |
| 3025 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) { | |
| 3141 Operand *Src0 = legalize(Inst->getSrc(0)); | 3026 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 3142 Operand *Src1 = legalize(Inst->getSrc(1)); | 3027 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 3143 Variable *Dest = Inst->getDest(); | 3028 Variable *Dest = Inst->getDest(); |
| 3144 | 3029 |
| 3145 if (isVectorType(Dest->getType())) { | 3030 if (isVectorType(Dest->getType())) { |
| 3146 Type Ty = Src0->getType(); | 3031 Type Ty = Src0->getType(); |
| 3147 // Promote i1 vectors to 128 bit integer vector types. | 3032 // Promote i1 vectors to 128 bit integer vector types. |
| 3148 if (typeElementType(Ty) == IceType_i1) { | 3033 if (typeElementType(Ty) == IceType_i1) { |
| 3149 Type NewTy = IceType_NUM; | 3034 Type NewTy = IceType_NUM; |
| 3150 switch (Ty) { | 3035 switch (Ty) { |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3248 | 3133 |
| 3249 _movp(Dest, T); | 3134 _movp(Dest, T); |
| 3250 eliminateNextVectorSextInstruction(Dest); | 3135 eliminateNextVectorSextInstruction(Dest); |
| 3251 return; | 3136 return; |
| 3252 } | 3137 } |
| 3253 | 3138 |
| 3254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 3139 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| 3255 if (Src0->getType() == IceType_i64) { | 3140 if (Src0->getType() == IceType_i64) { |
| 3256 InstIcmp::ICond Condition = Inst->getCondition(); | 3141 InstIcmp::ICond Condition = Inst->getCondition(); |
| 3257 size_t Index = static_cast<size_t>(Condition); | 3142 size_t Index = static_cast<size_t>(Condition); |
| 3258 assert(Index < TableIcmp64Size); | 3143 assert(Index < Traits::TableIcmp64Size); |
| 3259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | 3144 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); |
| 3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | 3145 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); |
| 3261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 3146 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| 3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 3147 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| 3263 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3148 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 3264 Constant *One = Ctx->getConstantInt32(1); | 3149 Constant *One = Ctx->getConstantInt32(1); |
| 3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | 3150 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
| 3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | 3151 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
| 3267 _mov(Dest, One); | 3152 _mov(Dest, One); |
| 3268 _cmp(Src0HiRM, Src1HiRI); | 3153 _cmp(Src0HiRM, Src1HiRI); |
| 3269 if (TableIcmp64[Index].C1 != CondX86::Br_None) | 3154 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None) |
| 3270 _br(TableIcmp64[Index].C1, LabelTrue); | 3155 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 3271 if (TableIcmp64[Index].C2 != CondX86::Br_None) | 3156 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None) |
| 3272 _br(TableIcmp64[Index].C2, LabelFalse); | 3157 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 3273 _cmp(Src0LoRM, Src1LoRI); | 3158 _cmp(Src0LoRM, Src1LoRI); |
| 3274 _br(TableIcmp64[Index].C3, LabelTrue); | 3159 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 3275 Context.insert(LabelFalse); | 3160 Context.insert(LabelFalse); |
| 3276 _mov_nonkillable(Dest, Zero); | 3161 _mov_nonkillable(Dest, Zero); |
| 3277 Context.insert(LabelTrue); | 3162 Context.insert(LabelTrue); |
| 3278 return; | 3163 return; |
| 3279 } | 3164 } |
| 3280 | 3165 |
| 3281 // cmp b, c | 3166 // cmp b, c |
| 3282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 3167 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 3283 _cmp(Src0RM, Src1); | 3168 _cmp(Src0RM, Src1); |
| 3284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition())); | 3169 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); |
| 3285 } | 3170 } |
| 3286 | 3171 |
| 3287 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | 3172 template <class Machine> |
| 3173 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | |
| 3288 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3174 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3175 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| 3290 ConstantInteger32 *ElementIndex = | 3176 ConstantInteger32 *ElementIndex = |
| 3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 3177 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
| 3292 // Only constant indices are allowed in PNaCl IR. | 3178 // Only constant indices are allowed in PNaCl IR. |
| 3293 assert(ElementIndex); | 3179 assert(ElementIndex); |
| 3294 unsigned Index = ElementIndex->getValue(); | 3180 unsigned Index = ElementIndex->getValue(); |
| 3295 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 3181 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
| 3296 | 3182 |
| 3297 Type Ty = SourceVectNotLegalized->getType(); | 3183 Type Ty = SourceVectNotLegalized->getType(); |
| 3298 Type ElementTy = typeElementType(Ty); | 3184 Type ElementTy = typeElementType(Ty); |
| 3299 Type InVectorElementTy = getInVectorElementType(Ty); | 3185 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
| 3300 | 3186 |
| 3301 if (ElementTy == IceType_i1) { | 3187 if (ElementTy == IceType_i1) { |
| 3302 // Expand the element to the appropriate size for it to be inserted | 3188 // Expand the element to the appropriate size for it to be inserted |
| 3303 // in the vector. | 3189 // in the vector. |
| 3304 Variable *Expanded = Func->makeVariable(InVectorElementTy); | 3190 Variable *Expanded = Func->makeVariable(InVectorElementTy); |
| 3305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, | 3191 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, |
| 3306 ElementToInsertNotLegalized); | 3192 ElementToInsertNotLegalized); |
| 3307 lowerCast(Cast); | 3193 lowerCast(Cast); |
| 3308 ElementToInsertNotLegalized = Expanded; | 3194 ElementToInsertNotLegalized = Expanded; |
| 3309 } | 3195 } |
| 3310 | 3196 |
| 3311 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { | 3197 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 3198 InstructionSet >= Machine::SSE4_1) { | |
| 3312 // Use insertps, pinsrb, pinsrw, or pinsrd. | 3199 // Use insertps, pinsrb, pinsrw, or pinsrd. |
| 3313 Operand *ElementRM = | 3200 Operand *ElementRM = |
| 3314 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3201 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
| 3315 Operand *SourceVectRM = | 3202 Operand *SourceVectRM = |
| 3316 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3203 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 3317 Variable *T = makeReg(Ty); | 3204 Variable *T = makeReg(Ty); |
| 3318 _movp(T, SourceVectRM); | 3205 _movp(T, SourceVectRM); |
| 3319 if (Ty == IceType_v4f32) | 3206 if (Ty == IceType_v4f32) |
| 3320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3207 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
| 3321 else | 3208 else |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3400 OperandX8632Mem *Loc = | 3287 OperandX8632Mem *Loc = |
| 3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 3288 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
| 3402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); | 3289 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); |
| 3403 | 3290 |
| 3404 Variable *T = makeReg(Ty); | 3291 Variable *T = makeReg(Ty); |
| 3405 _movp(T, Slot); | 3292 _movp(T, Slot); |
| 3406 _movp(Inst->getDest(), T); | 3293 _movp(Inst->getDest(), T); |
| 3407 } | 3294 } |
| 3408 } | 3295 } |
| 3409 | 3296 |
| 3410 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 3297 template <class Machine> |
| 3298 void TargetX86Base<Machine>::lowerIntrinsicCall( | |
| 3299 const InstIntrinsicCall *Instr) { | |
| 3411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { | 3300 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
| 3412 case Intrinsics::AtomicCmpxchg: { | 3301 case Intrinsics::AtomicCmpxchg: { |
| 3413 if (!Intrinsics::isMemoryOrderValid( | 3302 if (!Intrinsics::isMemoryOrderValid( |
| 3414 ID, getConstantMemoryOrder(Instr->getArg(3)), | 3303 ID, getConstantMemoryOrder(Instr->getArg(3)), |
| 3415 getConstantMemoryOrder(Instr->getArg(4)))) { | 3304 getConstantMemoryOrder(Instr->getArg(4)))) { |
| 3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 3305 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| 3417 return; | 3306 return; |
| 3418 } | 3307 } |
| 3419 Variable *DestPrev = Instr->getDest(); | 3308 Variable *DestPrev = Instr->getDest(); |
| 3420 Operand *PtrToMem = Instr->getArg(0); | 3309 Operand *PtrToMem = Instr->getArg(0); |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3503 Context.insert( | 3392 Context.insert( |
| 3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 3393 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 3505 return; | 3394 return; |
| 3506 } | 3395 } |
| 3507 case Intrinsics::AtomicRMW: | 3396 case Intrinsics::AtomicRMW: |
| 3508 if (!Intrinsics::isMemoryOrderValid( | 3397 if (!Intrinsics::isMemoryOrderValid( |
| 3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3398 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| 3510 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3399 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 3511 return; | 3400 return; |
| 3512 } | 3401 } |
| 3513 lowerAtomicRMW( | 3402 lowerAtomicRMW(Instr->getDest(), |
| 3514 Instr->getDest(), | 3403 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( |
| 3515 static_cast<uint32_t>( | 3404 Instr->getArg(0))->getValue()), |
| 3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), | 3405 Instr->getArg(1), Instr->getArg(2)); |
| 3517 Instr->getArg(1), Instr->getArg(2)); | |
| 3518 return; | 3406 return; |
| 3519 case Intrinsics::AtomicStore: { | 3407 case Intrinsics::AtomicStore: { |
| 3520 if (!Intrinsics::isMemoryOrderValid( | 3408 if (!Intrinsics::isMemoryOrderValid( |
| 3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 3409 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 3522 Func->setError("Unexpected memory ordering for AtomicStore"); | 3410 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 3523 return; | 3411 return; |
| 3524 } | 3412 } |
| 3525 // We require the memory address to be naturally aligned. | 3413 // We require the memory address to be naturally aligned. |
| 3526 // Given that is the case, then normal stores are atomic. | 3414 // Given that is the case, then normal stores are atomic. |
| 3527 // Add a fence after the store to make it visible. | 3415 // Add a fence after the store to make it visible. |
| (...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3733 case Intrinsics::Trap: | 3621 case Intrinsics::Trap: |
| 3734 _ud2(); | 3622 _ud2(); |
| 3735 return; | 3623 return; |
| 3736 case Intrinsics::UnknownIntrinsic: | 3624 case Intrinsics::UnknownIntrinsic: |
| 3737 Func->setError("Should not be lowering UnknownIntrinsic"); | 3625 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3738 return; | 3626 return; |
| 3739 } | 3627 } |
| 3740 return; | 3628 return; |
| 3741 } | 3629 } |
| 3742 | 3630 |
| 3743 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, | 3631 template <class Machine> |
| 3744 Operand *Expected, Operand *Desired) { | 3632 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3633 Operand *Ptr, Operand *Expected, | |
| 3634 Operand *Desired) { | |
| 3745 if (Expected->getType() == IceType_i64) { | 3635 if (Expected->getType() == IceType_i64) { |
| 3746 // Reserve the pre-colored registers first, before adding any more | 3636 // Reserve the pre-colored registers first, before adding any more |
| 3747 // infinite-weight variables from formMemoryOperand's legalization. | 3637 // infinite-weight variables from formMemoryOperand's legalization. |
| 3748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 3638 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
| 3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); | 3639 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); |
| 3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 3640 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
| 3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); | 3641 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); |
| 3752 _mov(T_eax, loOperand(Expected)); | 3642 _mov(T_eax, loOperand(Expected)); |
| 3753 _mov(T_edx, hiOperand(Expected)); | 3643 _mov(T_edx, hiOperand(Expected)); |
| 3754 _mov(T_ebx, loOperand(Desired)); | 3644 _mov(T_ebx, loOperand(Desired)); |
| 3755 _mov(T_ecx, hiOperand(Desired)); | 3645 _mov(T_ecx, hiOperand(Desired)); |
| 3756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); | 3646 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); |
| 3757 const bool Locked = true; | 3647 const bool Locked = true; |
| 3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3648 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3649 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3650 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 3761 _mov(DestLo, T_eax); | 3651 _mov(DestLo, T_eax); |
| 3762 _mov(DestHi, T_edx); | 3652 _mov(DestHi, T_edx); |
| 3763 return; | 3653 return; |
| 3764 } | 3654 } |
| 3765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); | 3655 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); |
| 3766 _mov(T_eax, Expected); | 3656 _mov(T_eax, Expected); |
| 3767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); | 3657 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); |
| 3768 Variable *DesiredReg = legalizeToVar(Desired); | 3658 Variable *DesiredReg = legalizeToVar(Desired); |
| 3769 const bool Locked = true; | 3659 const bool Locked = true; |
| 3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3660 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 3771 _mov(DestPrev, T_eax); | 3661 _mov(DestPrev, T_eax); |
| 3772 } | 3662 } |
| 3773 | 3663 |
| 3774 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | 3664 template <class Machine> |
| 3775 Operand *Expected, | 3665 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
| 3776 Operand *Desired) { | 3666 Operand *PtrToMem, |
| 3667 Operand *Expected, | |
| 3668 Operand *Desired) { | |
| 3777 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3669 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 3778 return false; | 3670 return false; |
| 3779 // Peek ahead a few instructions and see how Dest is used. | 3671 // Peek ahead a few instructions and see how Dest is used. |
| 3780 // It's very common to have: | 3672 // It's very common to have: |
| 3781 // | 3673 // |
| 3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 3674 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
| 3783 // [%y_phi = ...] // list of phi stores | 3675 // [%y_phi = ...] // list of phi stores |
| 3784 // %p = icmp eq i32 %x, %expected | 3676 // %p = icmp eq i32 %x, %expected |
| 3785 // br i1 %p, label %l1, label %l2 | 3677 // br i1 %p, label %l1, label %l2 |
| 3786 // | 3678 // |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3837 NextBr->setDeleted(); | 3729 NextBr->setDeleted(); |
| 3838 Context.advanceNext(); | 3730 Context.advanceNext(); |
| 3839 Context.advanceNext(); | 3731 Context.advanceNext(); |
| 3840 return true; | 3732 return true; |
| 3841 } | 3733 } |
| 3842 } | 3734 } |
| 3843 } | 3735 } |
| 3844 return false; | 3736 return false; |
| 3845 } | 3737 } |
| 3846 | 3738 |
| 3847 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3739 template <class Machine> |
| 3848 Operand *Ptr, Operand *Val) { | 3740 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 3741 Operand *Ptr, Operand *Val) { | |
| 3849 bool NeedsCmpxchg = false; | 3742 bool NeedsCmpxchg = false; |
| 3850 LowerBinOp Op_Lo = nullptr; | 3743 LowerBinOp Op_Lo = nullptr; |
| 3851 LowerBinOp Op_Hi = nullptr; | 3744 LowerBinOp Op_Hi = nullptr; |
| 3852 switch (Operation) { | 3745 switch (Operation) { |
| 3853 default: | 3746 default: |
| 3854 Func->setError("Unknown AtomicRMW operation"); | 3747 Func->setError("Unknown AtomicRMW operation"); |
| 3855 return; | 3748 return; |
| 3856 case Intrinsics::AtomicAdd: { | 3749 case Intrinsics::AtomicAdd: { |
| 3857 if (Dest->getType() == IceType_i64) { | 3750 if (Dest->getType() == IceType_i64) { |
| 3858 // All the fall-through paths must set this to true, but use this | 3751 // All the fall-through paths must set this to true, but use this |
| 3859 // for asserting. | 3752 // for asserting. |
| 3860 NeedsCmpxchg = true; | 3753 NeedsCmpxchg = true; |
| 3861 Op_Lo = &TargetX8632::_add; | 3754 Op_Lo = &TargetX86Base<Machine>::_add; |
| 3862 Op_Hi = &TargetX8632::_adc; | 3755 Op_Hi = &TargetX86Base<Machine>::_adc; |
| 3863 break; | 3756 break; |
| 3864 } | 3757 } |
| 3865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); | 3758 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3866 const bool Locked = true; | 3759 const bool Locked = true; |
| 3867 Variable *T = nullptr; | 3760 Variable *T = nullptr; |
| 3868 _mov(T, Val); | 3761 _mov(T, Val); |
| 3869 _xadd(Addr, T, Locked); | 3762 _xadd(Addr, T, Locked); |
| 3870 _mov(Dest, T); | 3763 _mov(Dest, T); |
| 3871 return; | 3764 return; |
| 3872 } | 3765 } |
| 3873 case Intrinsics::AtomicSub: { | 3766 case Intrinsics::AtomicSub: { |
| 3874 if (Dest->getType() == IceType_i64) { | 3767 if (Dest->getType() == IceType_i64) { |
| 3875 NeedsCmpxchg = true; | 3768 NeedsCmpxchg = true; |
| 3876 Op_Lo = &TargetX8632::_sub; | 3769 Op_Lo = &TargetX86Base<Machine>::_sub; |
| 3877 Op_Hi = &TargetX8632::_sbb; | 3770 Op_Hi = &TargetX86Base<Machine>::_sbb; |
| 3878 break; | 3771 break; |
| 3879 } | 3772 } |
| 3880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); | 3773 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3881 const bool Locked = true; | 3774 const bool Locked = true; |
| 3882 Variable *T = nullptr; | 3775 Variable *T = nullptr; |
| 3883 _mov(T, Val); | 3776 _mov(T, Val); |
| 3884 _neg(T); | 3777 _neg(T); |
| 3885 _xadd(Addr, T, Locked); | 3778 _xadd(Addr, T, Locked); |
| 3886 _mov(Dest, T); | 3779 _mov(Dest, T); |
| 3887 return; | 3780 return; |
| 3888 } | 3781 } |
| 3889 case Intrinsics::AtomicOr: | 3782 case Intrinsics::AtomicOr: |
| 3890 // TODO(jvoung): If Dest is null or dead, then some of these | 3783 // TODO(jvoung): If Dest is null or dead, then some of these |
| 3891 // operations do not need an "exchange", but just a locked op. | 3784 // operations do not need an "exchange", but just a locked op. |
| 3892 // That appears to be "worth" it for sub, or, and, and xor. | 3785 // That appears to be "worth" it for sub, or, and, and xor. |
| 3893 // xadd is probably fine vs lock add for add, and xchg is fine | 3786 // xadd is probably fine vs lock add for add, and xchg is fine |
| 3894 // vs an atomic store. | 3787 // vs an atomic store. |
| 3895 NeedsCmpxchg = true; | 3788 NeedsCmpxchg = true; |
| 3896 Op_Lo = &TargetX8632::_or; | 3789 Op_Lo = &TargetX86Base<Machine>::_or; |
| 3897 Op_Hi = &TargetX8632::_or; | 3790 Op_Hi = &TargetX86Base<Machine>::_or; |
| 3898 break; | 3791 break; |
| 3899 case Intrinsics::AtomicAnd: | 3792 case Intrinsics::AtomicAnd: |
| 3900 NeedsCmpxchg = true; | 3793 NeedsCmpxchg = true; |
| 3901 Op_Lo = &TargetX8632::_and; | 3794 Op_Lo = &TargetX86Base<Machine>::_and; |
| 3902 Op_Hi = &TargetX8632::_and; | 3795 Op_Hi = &TargetX86Base<Machine>::_and; |
| 3903 break; | 3796 break; |
| 3904 case Intrinsics::AtomicXor: | 3797 case Intrinsics::AtomicXor: |
| 3905 NeedsCmpxchg = true; | 3798 NeedsCmpxchg = true; |
| 3906 Op_Lo = &TargetX8632::_xor; | 3799 Op_Lo = &TargetX86Base<Machine>::_xor; |
| 3907 Op_Hi = &TargetX8632::_xor; | 3800 Op_Hi = &TargetX86Base<Machine>::_xor; |
| 3908 break; | 3801 break; |
| 3909 case Intrinsics::AtomicExchange: | 3802 case Intrinsics::AtomicExchange: |
| 3910 if (Dest->getType() == IceType_i64) { | 3803 if (Dest->getType() == IceType_i64) { |
| 3911 NeedsCmpxchg = true; | 3804 NeedsCmpxchg = true; |
| 3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | 3805 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 3913 // just need to be moved to the ecx and ebx registers. | 3806 // just need to be moved to the ecx and ebx registers. |
| 3914 Op_Lo = nullptr; | 3807 Op_Lo = nullptr; |
| 3915 Op_Hi = nullptr; | 3808 Op_Hi = nullptr; |
| 3916 break; | 3809 break; |
| 3917 } | 3810 } |
| 3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); | 3811 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3919 Variable *T = nullptr; | 3812 Variable *T = nullptr; |
| 3920 _mov(T, Val); | 3813 _mov(T, Val); |
| 3921 _xchg(Addr, T); | 3814 _xchg(Addr, T); |
| 3922 _mov(Dest, T); | 3815 _mov(Dest, T); |
| 3923 return; | 3816 return; |
| 3924 } | 3817 } |
| 3925 // Otherwise, we need a cmpxchg loop. | 3818 // Otherwise, we need a cmpxchg loop. |
| 3926 (void)NeedsCmpxchg; | 3819 (void)NeedsCmpxchg; |
| 3927 assert(NeedsCmpxchg); | 3820 assert(NeedsCmpxchg); |
| 3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); | 3821 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); |
| 3929 } | 3822 } |
| 3930 | 3823 |
| 3931 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, | 3824 template <class Machine> |
| 3932 Variable *Dest, Operand *Ptr, | 3825 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, |
| 3933 Operand *Val) { | 3826 LowerBinOp Op_Hi, |
| 3827 Variable *Dest, | |
| 3828 Operand *Ptr, | |
| 3829 Operand *Val) { | |
| 3934 // Expand a more complex RMW operation as a cmpxchg loop: | 3830 // Expand a more complex RMW operation as a cmpxchg loop: |
| 3935 // For 64-bit: | 3831 // For 64-bit: |
| 3936 // mov eax, [ptr] | 3832 // mov eax, [ptr] |
| 3937 // mov edx, [ptr + 4] | 3833 // mov edx, [ptr + 4] |
| 3938 // .LABEL: | 3834 // .LABEL: |
| 3939 // mov ebx, eax | 3835 // mov ebx, eax |
| 3940 // <Op_Lo> ebx, <desired_adj_lo> | 3836 // <Op_Lo> ebx, <desired_adj_lo> |
| 3941 // mov ecx, edx | 3837 // mov ecx, edx |
| 3942 // <Op_Hi> ecx, <desired_adj_hi> | 3838 // <Op_Hi> ecx, <desired_adj_hi> |
| 3943 // lock cmpxchg8b [ptr] | 3839 // lock cmpxchg8b [ptr] |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4028 // The address base (if any) is also reused in the loop. | 3924 // The address base (if any) is also reused in the loop. |
| 4029 if (Variable *Base = Addr->getBase()) | 3925 if (Variable *Base = Addr->getBase()) |
| 4030 Context.insert(InstFakeUse::create(Func, Base)); | 3926 Context.insert(InstFakeUse::create(Func, Base)); |
| 4031 _mov(Dest, T_eax); | 3927 _mov(Dest, T_eax); |
| 4032 } | 3928 } |
| 4033 | 3929 |
| 4034 // Lowers count {trailing, leading} zeros intrinsic. | 3930 // Lowers count {trailing, leading} zeros intrinsic. |
| 4035 // | 3931 // |
| 4036 // We could do constant folding here, but that should have | 3932 // We could do constant folding here, but that should have |
| 4037 // been done by the front-end/middle-end optimizations. | 3933 // been done by the front-end/middle-end optimizations. |
| 4038 void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3934 template <class Machine> |
| 4039 Operand *FirstVal, Operand *SecondVal) { | 3935 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| 3936 Operand *FirstVal, | |
| 3937 Operand *SecondVal) { | |
| 4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). | 3938 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
| 4041 // Then the instructions will handle the Val == 0 case much more simply | 3939 // Then the instructions will handle the Val == 0 case much more simply |
| 4042 // and won't require conversion from bit position to number of zeros. | 3940 // and won't require conversion from bit position to number of zeros. |
| 4043 // | 3941 // |
| 4044 // Otherwise: | 3942 // Otherwise: |
| 4045 // bsr IF_NOT_ZERO, Val | 3943 // bsr IF_NOT_ZERO, Val |
| 4046 // mov T_DEST, 63 | 3944 // mov T_DEST, 63 |
| 4047 // cmovne T_DEST, IF_NOT_ZERO | 3945 // cmovne T_DEST, IF_NOT_ZERO |
| 4048 // xor T_DEST, 31 | 3946 // xor T_DEST, 31 |
| 4049 // mov DEST, T_DEST | 3947 // mov DEST, T_DEST |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4100 } else { | 3998 } else { |
| 4101 _bsr(T_Dest2, SecondVar); | 3999 _bsr(T_Dest2, SecondVar); |
| 4102 _xor(T_Dest2, ThirtyOne); | 4000 _xor(T_Dest2, ThirtyOne); |
| 4103 } | 4001 } |
| 4104 _test(SecondVar, SecondVar); | 4002 _test(SecondVar, SecondVar); |
| 4105 _cmov(T_Dest2, T_Dest, CondX86::Br_e); | 4003 _cmov(T_Dest2, T_Dest, CondX86::Br_e); |
| 4106 _mov(DestLo, T_Dest2); | 4004 _mov(DestLo, T_Dest2); |
| 4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 4005 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 4108 } | 4006 } |
| 4109 | 4007 |
| 4110 namespace { | |
| 4111 | |
| 4112 bool isAdd(const Inst *Inst) { | 4008 bool isAdd(const Inst *Inst) { |
| 4113 if (const InstArithmetic *Arith = | 4009 if (const InstArithmetic *Arith = |
| 4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 4010 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 4115 return (Arith->getOp() == InstArithmetic::Add); | 4011 return (Arith->getOp() == InstArithmetic::Add); |
| 4116 } | 4012 } |
| 4117 return false; | 4013 return false; |
| 4118 } | 4014 } |
| 4119 | 4015 |
| 4120 void dumpAddressOpt(const Cfg *Func, const Variable *Base, | 4016 void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| 4121 const Variable *Index, uint16_t Shift, int32_t Offset, | 4017 const Variable *Index, uint16_t Shift, int32_t Offset, |
| (...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4342 // set Index=Var, Offset+=(Const<<Shift) | 4238 // set Index=Var, Offset+=(Const<<Shift) |
| 4343 | 4239 |
| 4344 // Index is Index=Var-Const ==> | 4240 // Index is Index=Var-Const ==> |
| 4345 // set Index=Var, Offset-=(Const<<Shift) | 4241 // set Index=Var, Offset-=(Const<<Shift) |
| 4346 | 4242 |
| 4347 // TODO: consider overflow issues with respect to Offset. | 4243 // TODO: consider overflow issues with respect to Offset. |
| 4348 // TODO: handle symbolic constants. | 4244 // TODO: handle symbolic constants. |
| 4349 } | 4245 } |
| 4350 } | 4246 } |
| 4351 | 4247 |
| 4352 } // anonymous namespace | 4248 template <class Machine> |
| 4353 | 4249 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { |
| 4354 void TargetX8632::lowerLoad(const InstLoad *Load) { | |
| 4355 // A Load instruction can be treated the same as an Assign | 4250 // A Load instruction can be treated the same as an Assign |
| 4356 // instruction, after the source operand is transformed into an | 4251 // instruction, after the source operand is transformed into an |
| 4357 // OperandX8632Mem operand. Note that the address mode | 4252 // OperandX8632Mem operand. Note that the address mode |
| 4358 // optimization already creates an OperandX8632Mem operand, so it | 4253 // optimization already creates an OperandX8632Mem operand, so it |
| 4359 // doesn't need another level of transformation. | 4254 // doesn't need another level of transformation. |
| 4360 Variable *DestLoad = Load->getDest(); | 4255 Variable *DestLoad = Load->getDest(); |
| 4361 Type Ty = DestLoad->getType(); | 4256 Type Ty = DestLoad->getType(); |
| 4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 4257 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
| 4363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 4258 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
| 4364 lowerAssign(Assign); | 4259 lowerAssign(Assign); |
| 4365 } | 4260 } |
| 4366 | 4261 |
| 4367 void TargetX8632::doAddressOptLoad() { | 4262 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
| 4368 Inst *Inst = Context.getCur(); | 4263 Inst *Inst = Context.getCur(); |
| 4369 Variable *Dest = Inst->getDest(); | 4264 Variable *Dest = Inst->getDest(); |
| 4370 Operand *Addr = Inst->getSrc(0); | 4265 Operand *Addr = Inst->getSrc(0); |
| 4371 Variable *Index = nullptr; | 4266 Variable *Index = nullptr; |
| 4372 uint16_t Shift = 0; | 4267 uint16_t Shift = 0; |
| 4373 int32_t Offset = 0; // TODO: make Constant | 4268 int32_t Offset = 0; // TODO: make Constant |
| 4374 // Vanilla ICE load instructions should not use the segment registers, | 4269 // Vanilla ICE load instructions should not use the segment registers, |
| 4375 // and computeAddressOpt only works at the level of Variables and Constants, | 4270 // and computeAddressOpt only works at the level of Variables and Constants, |
| 4376 // not other OperandX8632Mem, so there should be no mention of segment | 4271 // not other OperandX8632Mem, so there should be no mention of segment |
| 4377 // registers there either. | 4272 // registers there either. |
| 4378 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4273 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4379 OperandX8632Mem::DefaultSegment; | 4274 OperandX8632Mem::DefaultSegment; |
| 4380 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4275 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4276 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4382 if (Base && Addr != Base) { | 4277 if (Base && Addr != Base) { |
| 4383 Inst->setDeleted(); | 4278 Inst->setDeleted(); |
| 4384 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4279 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, | 4280 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
| 4386 Shift, SegmentReg); | 4281 Shift, SegmentReg); |
| 4387 Context.insert(InstLoad::create(Func, Dest, Addr)); | 4282 Context.insert(InstLoad::create(Func, Dest, Addr)); |
| 4388 } | 4283 } |
| 4389 } | 4284 } |
| 4390 | 4285 |
| 4391 void TargetX8632::randomlyInsertNop(float Probability) { | 4286 template <class Machine> |
| 4287 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) { | |
| 4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); | 4288 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); |
| 4393 if (RNG.getTrueWithProbability(Probability)) { | 4289 if (RNG.getTrueWithProbability(Probability)) { |
| 4394 _nop(RNG(X86_NUM_NOP_VARIANTS)); | 4290 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS)); |
| 4395 } | 4291 } |
| 4396 } | 4292 } |
| 4397 | 4293 |
| 4398 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { | 4294 template <class Machine> |
| 4295 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | |
| 4399 Func->setError("Phi found in regular instruction list"); | 4296 Func->setError("Phi found in regular instruction list"); |
| 4400 } | 4297 } |
| 4401 | 4298 |
| 4402 void TargetX8632::lowerRet(const InstRet *Inst) { | 4299 template <class Machine> |
| 4300 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) { | |
| 4403 Variable *Reg = nullptr; | 4301 Variable *Reg = nullptr; |
| 4404 if (Inst->hasRetValue()) { | 4302 if (Inst->hasRetValue()) { |
| 4405 Operand *Src0 = legalize(Inst->getRetValue()); | 4303 Operand *Src0 = legalize(Inst->getRetValue()); |
| 4406 if (Src0->getType() == IceType_i64) { | 4304 if (Src0->getType() == IceType_i64) { |
| 4407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); | 4305 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); |
| 4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); | 4306 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); |
| 4409 Reg = eax; | 4307 Reg = eax; |
| 4410 Context.insert(InstFakeUse::create(Func, edx)); | 4308 Context.insert(InstFakeUse::create(Func, edx)); |
| 4411 } else if (isScalarFloatingType(Src0->getType())) { | 4309 } else if (isScalarFloatingType(Src0->getType())) { |
| 4412 _fld(Src0); | 4310 _fld(Src0); |
| 4413 } else if (isVectorType(Src0->getType())) { | 4311 } else if (isVectorType(Src0->getType())) { |
| 4414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); | 4312 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); |
| 4415 } else { | 4313 } else { |
| 4416 _mov(Reg, Src0, RegX8632::Reg_eax); | 4314 _mov(Reg, Src0, RegX8632::Reg_eax); |
| 4417 } | 4315 } |
| 4418 } | 4316 } |
| 4419 // Add a ret instruction even if sandboxing is enabled, because | 4317 // Add a ret instruction even if sandboxing is enabled, because |
| 4420 // addEpilog explicitly looks for a ret instruction as a marker for | 4318 // addEpilog explicitly looks for a ret instruction as a marker for |
| 4421 // where to insert the frame removal instructions. | 4319 // where to insert the frame removal instructions. |
| 4422 _ret(Reg); | 4320 _ret(Reg); |
| 4423 // Add a fake use of esp to make sure esp stays alive for the entire | 4321 // Add a fake use of esp to make sure esp stays alive for the entire |
| 4424 // function. Otherwise post-call esp adjustments get dead-code | 4322 // function. Otherwise post-call esp adjustments get dead-code |
| 4425 // eliminated. TODO: Are there more places where the fake use | 4323 // eliminated. TODO: Are there more places where the fake use |
| 4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 4324 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
| 4427 // have a ret instruction. | 4325 // have a ret instruction. |
| 4428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 4326 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
| 4429 Context.insert(InstFakeUse::create(Func, esp)); | 4327 Context.insert(InstFakeUse::create(Func, esp)); |
| 4430 } | 4328 } |
| 4431 | 4329 |
| 4432 void TargetX8632::lowerSelect(const InstSelect *Inst) { | 4330 template <class Machine> |
| 4331 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { | |
| 4433 Variable *Dest = Inst->getDest(); | 4332 Variable *Dest = Inst->getDest(); |
| 4434 Type DestTy = Dest->getType(); | 4333 Type DestTy = Dest->getType(); |
| 4435 Operand *SrcT = Inst->getTrueOperand(); | 4334 Operand *SrcT = Inst->getTrueOperand(); |
| 4436 Operand *SrcF = Inst->getFalseOperand(); | 4335 Operand *SrcF = Inst->getFalseOperand(); |
| 4437 Operand *Condition = Inst->getCondition(); | 4336 Operand *Condition = Inst->getCondition(); |
| 4438 | 4337 |
| 4439 if (isVectorType(DestTy)) { | 4338 if (isVectorType(DestTy)) { |
| 4440 Type SrcTy = SrcT->getType(); | 4339 Type SrcTy = SrcT->getType(); |
| 4441 Variable *T = makeReg(SrcTy); | 4340 Variable *T = makeReg(SrcTy); |
| 4442 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 4341 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 4342 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| 4444 if (InstructionSet >= SSE4_1) { | 4343 if (InstructionSet >= Machine::SSE4_1) { |
| 4445 // TODO(wala): If the condition operand is a constant, use blendps | 4344 // TODO(wala): If the condition operand is a constant, use blendps |
| 4446 // or pblendw. | 4345 // or pblendw. |
| 4447 // | 4346 // |
| 4448 // Use blendvps or pblendvb to implement select. | 4347 // Use blendvps or pblendvb to implement select. |
| 4449 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 4348 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| 4450 SrcTy == IceType_v4f32) { | 4349 SrcTy == IceType_v4f32) { |
| 4451 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 4350 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); | 4351 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); |
| 4453 _movp(xmm0, ConditionRM); | 4352 _movp(xmm0, ConditionRM); |
| 4454 _psll(xmm0, Ctx->getConstantInt8(31)); | 4353 _psll(xmm0, Ctx->getConstantInt8(31)); |
| 4455 _movp(T, SrcFRM); | 4354 _movp(T, SrcFRM); |
| 4456 _blendvps(T, SrcTRM, xmm0); | 4355 _blendvps(T, SrcTRM, xmm0); |
| 4457 _movp(Dest, T); | 4356 _movp(Dest, T); |
| 4458 } else { | 4357 } else { |
| 4459 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 4358 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| 4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 4359 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
| 4461 : IceType_v16i8; | 4360 : IceType_v16i8; |
| 4462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); | 4361 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); |
| 4463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 4362 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| 4464 _movp(T, SrcFRM); | 4363 _movp(T, SrcFRM); |
| 4465 _pblendvb(T, SrcTRM, xmm0); | 4364 _pblendvb(T, SrcTRM, xmm0); |
| 4466 _movp(Dest, T); | 4365 _movp(Dest, T); |
| 4467 } | 4366 } |
| 4468 return; | 4367 return; |
| 4469 } | 4368 } |
| 4470 // Lower select without SSE4.1: | 4369 // Lower select without Machine::SSE4.1: |
| 4471 // a=d?b:c ==> | 4370 // a=d?b:c ==> |
| 4472 // if elementtype(d) != i1: | 4371 // if elementtype(d) != i1: |
| 4473 // d=sext(d); | 4372 // d=sext(d); |
| 4474 // a=(b&d)|(c&~d); | 4373 // a=(b&d)|(c&~d); |
| 4475 Variable *T2 = makeReg(SrcTy); | 4374 Variable *T2 = makeReg(SrcTy); |
| 4476 // Sign extend the condition operand if applicable. | 4375 // Sign extend the condition operand if applicable. |
| 4477 if (SrcTy == IceType_v4f32) { | 4376 if (SrcTy == IceType_v4f32) { |
| 4478 // The sext operation takes only integer arguments. | 4377 // The sext operation takes only integer arguments. |
| 4479 Variable *T3 = Func->makeVariable(IceType_v4i32); | 4378 Variable *T3 = Func->makeVariable(IceType_v4i32); |
| 4480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); | 4379 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 4498 Operand *CmpOpnd0 = nullptr; | 4397 Operand *CmpOpnd0 = nullptr; |
| 4499 Operand *CmpOpnd1 = nullptr; | 4398 Operand *CmpOpnd1 = nullptr; |
| 4500 // Handle folding opportunities. | 4399 // Handle folding opportunities. |
| 4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4400 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| 4502 assert(Producer->isDeleted()); | 4401 assert(Producer->isDeleted()); |
| 4503 switch (BoolFolding::getProducerKind(Producer)) { | 4402 switch (BoolFolding::getProducerKind(Producer)) { |
| 4504 default: | 4403 default: |
| 4505 break; | 4404 break; |
| 4506 case BoolFolding::PK_Icmp32: { | 4405 case BoolFolding::PK_Icmp32: { |
| 4507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 4406 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| 4508 Cond = getIcmp32Mapping(Cmp->getCondition()); | 4407 Cond = Traits::getIcmp32Mapping(Cmp->getCondition()); |
| 4509 CmpOpnd1 = legalize(Producer->getSrc(1)); | 4408 CmpOpnd1 = legalize(Producer->getSrc(1)); |
| 4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); | 4409 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); |
| 4511 } break; | 4410 } break; |
| 4512 } | 4411 } |
| 4513 } | 4412 } |
| 4514 if (CmpOpnd0 == nullptr) { | 4413 if (CmpOpnd0 == nullptr) { |
| 4515 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); | 4414 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | 4415 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
| 4517 } | 4416 } |
| 4518 assert(CmpOpnd0); | 4417 assert(CmpOpnd0); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4562 | 4461 |
| 4563 assert(DestTy == IceType_i16 || DestTy == IceType_i32); | 4462 assert(DestTy == IceType_i16 || DestTy == IceType_i32); |
| 4564 Variable *T = nullptr; | 4463 Variable *T = nullptr; |
| 4565 SrcF = legalize(SrcF); | 4464 SrcF = legalize(SrcF); |
| 4566 _mov(T, SrcF); | 4465 _mov(T, SrcF); |
| 4567 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4466 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4568 _cmov(T, SrcT, Cond); | 4467 _cmov(T, SrcT, Cond); |
| 4569 _mov(Dest, T); | 4468 _mov(Dest, T); |
| 4570 } | 4469 } |
| 4571 | 4470 |
| 4572 void TargetX8632::lowerStore(const InstStore *Inst) { | 4471 template <class Machine> |
| 4472 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | |
| 4573 Operand *Value = Inst->getData(); | 4473 Operand *Value = Inst->getData(); |
| 4574 Operand *Addr = Inst->getAddr(); | 4474 Operand *Addr = Inst->getAddr(); |
| 4575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 4475 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
| 4576 Type Ty = NewAddr->getType(); | 4476 Type Ty = NewAddr->getType(); |
| 4577 | 4477 |
| 4578 if (Ty == IceType_i64) { | 4478 if (Ty == IceType_i64) { |
| 4579 Value = legalize(Value); | 4479 Value = legalize(Value); |
| 4580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); | 4480 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); |
| 4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); | 4481 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); |
| 4582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 4482 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
| 4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 4483 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
| 4584 } else if (isVectorType(Ty)) { | 4484 } else if (isVectorType(Ty)) { |
| 4585 _storep(legalizeToVar(Value), NewAddr); | 4485 _storep(legalizeToVar(Value), NewAddr); |
| 4586 } else { | 4486 } else { |
| 4587 Value = legalize(Value, Legal_Reg | Legal_Imm); | 4487 Value = legalize(Value, Legal_Reg | Legal_Imm); |
| 4588 _store(Value, NewAddr); | 4488 _store(Value, NewAddr); |
| 4589 } | 4489 } |
| 4590 } | 4490 } |
| 4591 | 4491 |
| 4592 void TargetX8632::doAddressOptStore() { | 4492 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
| 4593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); | 4493 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); |
| 4594 Operand *Data = Inst->getData(); | 4494 Operand *Data = Inst->getData(); |
| 4595 Operand *Addr = Inst->getAddr(); | 4495 Operand *Addr = Inst->getAddr(); |
| 4596 Variable *Index = nullptr; | 4496 Variable *Index = nullptr; |
| 4597 uint16_t Shift = 0; | 4497 uint16_t Shift = 0; |
| 4598 int32_t Offset = 0; // TODO: make Constant | 4498 int32_t Offset = 0; // TODO: make Constant |
| 4599 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4499 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4600 // Vanilla ICE store instructions should not use the segment registers, | 4500 // Vanilla ICE store instructions should not use the segment registers, |
| 4601 // and computeAddressOpt only works at the level of Variables and Constants, | 4501 // and computeAddressOpt only works at the level of Variables and Constants, |
| 4602 // not other OperandX8632Mem, so there should be no mention of segment | 4502 // not other OperandX8632Mem, so there should be no mention of segment |
| 4603 // registers there either. | 4503 // registers there either. |
| 4604 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4504 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4605 OperandX8632Mem::DefaultSegment; | 4505 OperandX8632Mem::DefaultSegment; |
| 4606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4506 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4607 if (Base && Addr != Base) { | 4507 if (Base && Addr != Base) { |
| 4608 Inst->setDeleted(); | 4508 Inst->setDeleted(); |
| 4609 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4509 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4510 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| 4611 Shift, SegmentReg); | 4511 Shift, SegmentReg); |
| 4612 InstStore *NewStore = InstStore::create(Func, Data, Addr); | 4512 InstStore *NewStore = InstStore::create(Func, Data, Addr); |
| 4613 if (Inst->getDest()) | 4513 if (Inst->getDest()) |
| 4614 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 4514 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 4615 Context.insert(NewStore); | 4515 Context.insert(NewStore); |
| 4616 } | 4516 } |
| 4617 } | 4517 } |
| 4618 | 4518 |
| 4619 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 4519 template <class Machine> |
| 4520 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | |
| 4620 // This implements the most naive possible lowering. | 4521 // This implements the most naive possible lowering. |
| 4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 4522 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
| 4622 Operand *Src0 = Inst->getComparison(); | 4523 Operand *Src0 = Inst->getComparison(); |
| 4623 SizeT NumCases = Inst->getNumCases(); | 4524 SizeT NumCases = Inst->getNumCases(); |
| 4624 if (Src0->getType() == IceType_i64) { | 4525 if (Src0->getType() == IceType_i64) { |
| 4625 Src0 = legalize(Src0); // get Base/Index into physical registers | 4526 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4626 Operand *Src0Lo = loOperand(Src0); | 4527 Operand *Src0Lo = loOperand(Src0); |
| 4627 Operand *Src0Hi = hiOperand(Src0); | 4528 Operand *Src0Hi = hiOperand(Src0); |
| 4628 if (NumCases >= 2) { | 4529 if (NumCases >= 2) { |
| 4629 Src0Lo = legalizeToVar(Src0Lo); | 4530 Src0Lo = legalizeToVar(Src0Lo); |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 4653 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | 4554 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); |
| 4654 for (SizeT I = 0; I < NumCases; ++I) { | 4555 for (SizeT I = 0; I < NumCases; ++I) { |
| 4655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); | 4556 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); |
| 4656 _cmp(Src0, Value); | 4557 _cmp(Src0, Value); |
| 4657 _br(CondX86::Br_e, Inst->getLabel(I)); | 4558 _br(CondX86::Br_e, Inst->getLabel(I)); |
| 4658 } | 4559 } |
| 4659 | 4560 |
| 4660 _br(Inst->getLabelDefault()); | 4561 _br(Inst->getLabelDefault()); |
| 4661 } | 4562 } |
| 4662 | 4563 |
| 4663 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4564 template <class Machine> |
| 4664 Variable *Dest, Operand *Src0, | 4565 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 4665 Operand *Src1) { | 4566 Variable *Dest, Operand *Src0, |
| 4567 Operand *Src1) { | |
| 4666 assert(isVectorType(Dest->getType())); | 4568 assert(isVectorType(Dest->getType())); |
| 4667 Type Ty = Dest->getType(); | 4569 Type Ty = Dest->getType(); |
| 4668 Type ElementTy = typeElementType(Ty); | 4570 Type ElementTy = typeElementType(Ty); |
| 4669 SizeT NumElements = typeNumElements(Ty); | 4571 SizeT NumElements = typeNumElements(Ty); |
| 4670 | 4572 |
| 4671 Operand *T = Ctx->getConstantUndef(Ty); | 4573 Operand *T = Ctx->getConstantUndef(Ty); |
| 4672 for (SizeT I = 0; I < NumElements; ++I) { | 4574 for (SizeT I = 0; I < NumElements; ++I) { |
| 4673 Constant *Index = Ctx->getConstantInt32(I); | 4575 Constant *Index = Ctx->getConstantInt32(I); |
| 4674 | 4576 |
| 4675 // Extract the next two inputs. | 4577 // Extract the next two inputs. |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 4692 } | 4594 } |
| 4693 | 4595 |
| 4694 // The following pattern occurs often in lowered C and C++ code: | 4596 // The following pattern occurs often in lowered C and C++ code: |
| 4695 // | 4597 // |
| 4696 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 4598 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 4697 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 4599 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 4698 // | 4600 // |
| 4699 // We can eliminate the sext operation by copying the result of pcmpeqd, | 4601 // We can eliminate the sext operation by copying the result of pcmpeqd, |
| 4700 // pcmpgtd, or cmpps (which produce sign extended results) to the result | 4602 // pcmpgtd, or cmpps (which produce sign extended results) to the result |
| 4701 // of the sext operation. | 4603 // of the sext operation. |
| 4702 void TargetX8632::eliminateNextVectorSextInstruction( | 4604 template <class Machine> |
| 4605 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( | |
| 4703 Variable *SignExtendedResult) { | 4606 Variable *SignExtendedResult) { |
| 4704 if (InstCast *NextCast = | 4607 if (InstCast *NextCast = |
| 4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | 4608 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 4706 if (NextCast->getCastKind() == InstCast::Sext && | 4609 if (NextCast->getCastKind() == InstCast::Sext && |
| 4707 NextCast->getSrc(0) == SignExtendedResult) { | 4610 NextCast->getSrc(0) == SignExtendedResult) { |
| 4708 NextCast->setDeleted(); | 4611 NextCast->setDeleted(); |
| 4709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4612 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
| 4710 // Skip over the instruction. | 4613 // Skip over the instruction. |
| 4711 Context.advanceNext(); | 4614 Context.advanceNext(); |
| 4712 } | 4615 } |
| 4713 } | 4616 } |
| 4714 } | 4617 } |
| 4715 | 4618 |
| 4716 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } | 4619 template <class Machine> |
| 4620 void TargetX86Base<Machine>::lowerUnreachable( | |
| 4621 const InstUnreachable * /*Inst*/) { | |
| 4622 _ud2(); | |
| 4623 } | |
| 4717 | 4624 |
| 4718 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { | 4625 template <class Machine> |
| 4626 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) { | |
| 4719 // If the beacon variable's live range does not end in this | 4627 // If the beacon variable's live range does not end in this |
| 4720 // instruction, then it must end in the modified Store instruction | 4628 // instruction, then it must end in the modified Store instruction |
| 4721 // that follows. This means that the original Store instruction is | 4629 // that follows. This means that the original Store instruction is |
| 4722 // still there, either because the value being stored is used beyond | 4630 // still there, either because the value being stored is used beyond |
| 4723 // the Store instruction, or because dead code elimination did not | 4631 // the Store instruction, or because dead code elimination did not |
| 4724 // happen. In either case, we cancel RMW lowering (and the caller | 4632 // happen. In either case, we cancel RMW lowering (and the caller |
| 4725 // deletes the RMW instruction). | 4633 // deletes the RMW instruction). |
| 4726 if (!RMW->isLastUse(RMW->getBeacon())) | 4634 if (!RMW->isLastUse(RMW->getBeacon())) |
| 4727 return; | 4635 return; |
| 4728 Operand *Src = RMW->getData(); | 4636 Operand *Src = RMW->getData(); |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4782 return; | 4690 return; |
| 4783 case InstArithmetic::Xor: | 4691 case InstArithmetic::Xor: |
| 4784 Src = legalize(Src, Legal_Reg | Legal_Imm); | 4692 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| 4785 _xor_rmw(Addr, Src); | 4693 _xor_rmw(Addr, Src); |
| 4786 return; | 4694 return; |
| 4787 } | 4695 } |
| 4788 } | 4696 } |
| 4789 llvm::report_fatal_error("Couldn't lower RMW instruction"); | 4697 llvm::report_fatal_error("Couldn't lower RMW instruction"); |
| 4790 } | 4698 } |
| 4791 | 4699 |
| 4792 void TargetX8632::lowerOther(const Inst *Instr) { | 4700 template <class Machine> |
| 4701 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { | |
| 4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { | 4702 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
| 4794 lowerRMW(RMW); | 4703 lowerRMW(RMW); |
| 4795 } else { | 4704 } else { |
| 4796 TargetLowering::lowerOther(Instr); | 4705 TargetLowering::lowerOther(Instr); |
| 4797 } | 4706 } |
| 4798 } | 4707 } |
| 4799 | 4708 |
| 4800 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4709 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4801 // preserve integrity of liveness analysis. Undef values are also | 4710 // preserve integrity of liveness analysis. Undef values are also |
| 4802 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4711 // turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4803 // Undef input. | 4712 // Undef input. |
| 4804 void TargetX8632::prelowerPhis() { | 4713 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| 4805 // Pause constant blinding or pooling, blinding or pooling will be done later | 4714 // Pause constant blinding or pooling, blinding or pooling will be done later |
| 4806 // during phi lowering assignments | 4715 // during phi lowering assignments |
| 4807 BoolFlagSaver B(RandomizationPoolingPaused, true); | 4716 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 4808 | 4717 |
| 4809 CfgNode *Node = Context.getNode(); | 4718 CfgNode *Node = Context.getNode(); |
| 4810 for (Inst &I : Node->getPhis()) { | 4719 for (Inst &I : Node->getPhis()) { |
| 4811 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4720 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 4812 if (Phi->isDeleted()) | 4721 if (Phi->isDeleted()) |
| 4813 continue; | 4722 continue; |
| 4814 Variable *Dest = Phi->getDest(); | 4723 Variable *Dest = Phi->getDest(); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 4825 PhiLo->addArgument(loOperand(Src), Label); | 4734 PhiLo->addArgument(loOperand(Src), Label); |
| 4826 PhiHi->addArgument(hiOperand(Src), Label); | 4735 PhiHi->addArgument(hiOperand(Src), Label); |
| 4827 } | 4736 } |
| 4828 Node->getPhis().push_back(PhiLo); | 4737 Node->getPhis().push_back(PhiLo); |
| 4829 Node->getPhis().push_back(PhiHi); | 4738 Node->getPhis().push_back(PhiHi); |
| 4830 Phi->setDeleted(); | 4739 Phi->setDeleted(); |
| 4831 } | 4740 } |
| 4832 } | 4741 } |
| 4833 } | 4742 } |
| 4834 | 4743 |
| 4835 namespace { | |
| 4836 | |
| 4837 bool isMemoryOperand(const Operand *Opnd) { | 4744 bool isMemoryOperand(const Operand *Opnd) { |
| 4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd)) | 4745 if (const auto Var = llvm::dyn_cast<Variable>(Opnd)) |
| 4839 return !Var->hasReg(); | 4746 return !Var->hasReg(); |
| 4840 // We treat vector undef values the same as a memory operand, | 4747 // We treat vector undef values the same as a memory operand, |
| 4841 // because they do in fact need a register to materialize the vector | 4748 // because they do in fact need a register to materialize the vector |
| 4842 // of zeroes into. | 4749 // of zeroes into. |
| 4843 if (llvm::isa<ConstantUndef>(Opnd)) | 4750 if (llvm::isa<ConstantUndef>(Opnd)) |
| 4844 return isScalarFloatingType(Opnd->getType()) || | 4751 return isScalarFloatingType(Opnd->getType()) || |
| 4845 isVectorType(Opnd->getType()); | 4752 isVectorType(Opnd->getType()); |
| 4846 if (llvm::isa<Constant>(Opnd)) | 4753 if (llvm::isa<Constant>(Opnd)) |
| 4847 return isScalarFloatingType(Opnd->getType()); | 4754 return isScalarFloatingType(Opnd->getType()); |
| 4848 return true; | 4755 return true; |
| 4849 } | 4756 } |
| 4850 | 4757 |
| 4851 } // end of anonymous namespace | |
| 4852 | |
| 4853 // Lower the pre-ordered list of assignments into mov instructions. | 4758 // Lower the pre-ordered list of assignments into mov instructions. |
| 4854 // Also has to do some ad-hoc register allocation as necessary. | 4759 // Also has to do some ad-hoc register allocation as necessary. |
| 4855 void TargetX8632::lowerPhiAssignments(CfgNode *Node, | 4760 template <class Machine> |
| 4856 const AssignList &Assignments) { | 4761 void TargetX86Base<Machine>::lowerPhiAssignments( |
| 4762 CfgNode *Node, const AssignList &Assignments) { | |
| 4857 // Check that this is a properly initialized shell of a node. | 4763 // Check that this is a properly initialized shell of a node. |
| 4858 assert(Node->getOutEdges().size() == 1); | 4764 assert(Node->getOutEdges().size() == 1); |
| 4859 assert(Node->getInsts().empty()); | 4765 assert(Node->getInsts().empty()); |
| 4860 assert(Node->getPhis().empty()); | 4766 assert(Node->getPhis().empty()); |
| 4861 CfgNode *Succ = Node->getOutEdges().front(); | 4767 CfgNode *Succ = Node->getOutEdges().front(); |
| 4862 getContext().init(Node); | 4768 getContext().init(Node); |
| 4863 // Register set setup similar to regAlloc(). | 4769 // Register set setup similar to regAlloc(). |
| 4864 RegSetMask RegInclude = RegSet_All; | 4770 RegSetMask RegInclude = RegSet_All; |
| 4865 RegSetMask RegExclude = RegSet_StackPointer; | 4771 RegSetMask RegExclude = RegSet_StackPointer; |
| 4866 if (hasFramePointer()) | 4772 if (hasFramePointer()) |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4997 _br(Succ); | 4903 _br(Succ); |
| 4998 } | 4904 } |
| 4999 | 4905 |
| 5000 // There is no support for loading or emitting vector constants, so the | 4906 // There is no support for loading or emitting vector constants, so the |
| 5001 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | 4907 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
| 5002 // etc. are initialized with register operations. | 4908 // etc. are initialized with register operations. |
| 5003 // | 4909 // |
| 5004 // TODO(wala): Add limited support for vector constants so that | 4910 // TODO(wala): Add limited support for vector constants so that |
| 5005 // complex initialization in registers is unnecessary. | 4911 // complex initialization in registers is unnecessary. |
| 5006 | 4912 |
| 5007 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 4913 template <class Machine> |
| 4914 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { | |
| 5008 Variable *Reg = makeReg(Ty, RegNum); | 4915 Variable *Reg = makeReg(Ty, RegNum); |
| 5009 // Insert a FakeDef, since otherwise the live range of Reg might | 4916 // Insert a FakeDef, since otherwise the live range of Reg might |
| 5010 // be overestimated. | 4917 // be overestimated. |
| 5011 Context.insert(InstFakeDef::create(Func, Reg)); | 4918 Context.insert(InstFakeDef::create(Func, Reg)); |
| 5012 _pxor(Reg, Reg); | 4919 _pxor(Reg, Reg); |
| 5013 return Reg; | 4920 return Reg; |
| 5014 } | 4921 } |
| 5015 | 4922 |
| 5016 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { | 4923 template <class Machine> |
| 4924 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, | |
| 4925 int32_t RegNum) { | |
| 5017 Variable *MinusOnes = makeReg(Ty, RegNum); | 4926 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 4927 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 5019 Context.insert(InstFakeDef::create(Func, MinusOnes)); | 4928 Context.insert(InstFakeDef::create(Func, MinusOnes)); |
| 5020 _pcmpeq(MinusOnes, MinusOnes); | 4929 _pcmpeq(MinusOnes, MinusOnes); |
| 5021 return MinusOnes; | 4930 return MinusOnes; |
| 5022 } | 4931 } |
| 5023 | 4932 |
| 5024 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 4933 template <class Machine> |
| 4934 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | |
| 5025 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 4935 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 5026 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 4936 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 5027 _psub(Dest, MinusOne); | 4937 _psub(Dest, MinusOne); |
| 5028 return Dest; | 4938 return Dest; |
| 5029 } | 4939 } |
| 5030 | 4940 |
| 5031 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { | 4941 template <class Machine> |
| 4942 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, | |
| 4943 int32_t RegNum) { | |
| 5032 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 4944 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| 5033 Ty == IceType_v16i8); | 4945 Ty == IceType_v16i8); |
| 5034 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 4946 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| 5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 4947 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| 5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; | 4948 SizeT Shift = |
| 4949 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | |
| 5037 _psll(Reg, Ctx->getConstantInt8(Shift)); | 4950 _psll(Reg, Ctx->getConstantInt8(Shift)); |
| 5038 return Reg; | 4951 return Reg; |
| 5039 } else { | 4952 } else { |
| 5040 // SSE has no left shift operation for vectors of 8 bit integers. | 4953 // SSE has no left shift operation for vectors of 8 bit integers. |
| 5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 4954 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 5042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 4955 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 5043 Variable *Reg = makeReg(Ty, RegNum); | 4956 Variable *Reg = makeReg(Ty, RegNum); |
| 5044 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 4957 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 4958 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 5046 return Reg; | 4959 return Reg; |
| 5047 } | 4960 } |
| 5048 } | 4961 } |
| 5049 | 4962 |
| 5050 // Construct a mask in a register that can be and'ed with a | 4963 // Construct a mask in a register that can be and'ed with a |
| 5051 // floating-point value to mask off its sign bit. The value will be | 4964 // floating-point value to mask off its sign bit. The value will be |
| 5052 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> | 4965 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> |
| 5053 // for f64. Construct it as vector of ones logically right shifted | 4966 // for f64. Construct it as vector of ones logically right shifted |
| 5054 // one bit. TODO(stichnot): Fix the wala TODO above, to represent | 4967 // one bit. TODO(stichnot): Fix the wala TODO above, to represent |
| 5055 // vector constants in memory. | 4968 // vector constants in memory. |
| 5056 Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) { | 4969 template <class Machine> |
| 4970 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, | |
| 4971 int32_t RegNum) { | |
| 5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); | 4972 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); |
| 5058 _psrl(Reg, Ctx->getConstantInt8(1)); | 4973 _psrl(Reg, Ctx->getConstantInt8(1)); |
| 5059 return Reg; | 4974 return Reg; |
| 5060 } | 4975 } |
| 5061 | 4976 |
| 5062 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 4977 template <class Machine> |
| 5063 Variable *Slot, | 4978 OperandX8632Mem * |
| 5064 uint32_t Offset) { | 4979 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| 4980 uint32_t Offset) { | |
| 5065 // Ensure that Loc is a stack slot. | 4981 // Ensure that Loc is a stack slot. |
| 5066 assert(Slot->getWeight().isZero()); | 4982 assert(Slot->getWeight().isZero()); |
| 5067 assert(Slot->getRegNum() == Variable::NoRegister); | 4983 assert(Slot->getRegNum() == Variable::NoRegister); |
| 5068 // Compute the location of Loc in memory. | 4984 // Compute the location of Loc in memory. |
| 5069 // TODO(wala,stichnot): lea should not be required. The address of | 4985 // TODO(wala,stichnot): lea should not be required. The address of |
| 5070 // the stack slot is known at compile time (although not until after | 4986 // the stack slot is known at compile time (although not until after |
| 5071 // addProlog()). | 4987 // addProlog()). |
| 5072 const Type PointerType = IceType_i32; | 4988 const Type PointerType = IceType_i32; |
| 5073 Variable *Loc = makeReg(PointerType); | 4989 Variable *Loc = makeReg(PointerType); |
| 5074 _lea(Loc, Slot); | 4990 _lea(Loc, Slot); |
| 5075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 4991 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 5076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | 4992 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); |
| 5077 } | 4993 } |
| 5078 | 4994 |
| 5079 // Helper for legalize() to emit the right code to lower an operand to a | 4995 // Helper for legalize() to emit the right code to lower an operand to a |
| 5080 // register of the appropriate type. | 4996 // register of the appropriate type. |
| 5081 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { | 4997 template <class Machine> |
| 4998 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | |
| 5082 Type Ty = Src->getType(); | 4999 Type Ty = Src->getType(); |
| 5083 Variable *Reg = makeReg(Ty, RegNum); | 5000 Variable *Reg = makeReg(Ty, RegNum); |
| 5084 if (isVectorType(Ty)) { | 5001 if (isVectorType(Ty)) { |
| 5085 _movp(Reg, Src); | 5002 _movp(Reg, Src); |
| 5086 } else { | 5003 } else { |
| 5087 _mov(Reg, Src); | 5004 _mov(Reg, Src); |
| 5088 } | 5005 } |
| 5089 return Reg; | 5006 return Reg; |
| 5090 } | 5007 } |
| 5091 | 5008 |
| 5092 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, | 5009 template <class Machine> |
| 5093 int32_t RegNum) { | 5010 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, |
| 5011 int32_t RegNum) { | |
| 5094 Type Ty = From->getType(); | 5012 Type Ty = From->getType(); |
| 5095 // Assert that a physical register is allowed. To date, all calls | 5013 // Assert that a physical register is allowed. To date, all calls |
| 5096 // to legalize() allow a physical register. If a physical register | 5014 // to legalize() allow a physical register. If a physical register |
| 5097 // needs to be explicitly disallowed, then new code will need to be | 5015 // needs to be explicitly disallowed, then new code will need to be |
| 5098 // written to force a spill. | 5016 // written to force a spill. |
| 5099 assert(Allowed & Legal_Reg); | 5017 assert(Allowed & Legal_Reg); |
| 5100 // If we're asking for a specific physical register, make sure we're | 5018 // If we're asking for a specific physical register, make sure we're |
| 5101 // not allowing any other operand kinds. (This could be future | 5019 // not allowing any other operand kinds. (This could be future |
| 5102 // work, e.g. allow the shl shift amount to be either an immediate | 5020 // work, e.g. allow the shl shift amount to be either an immediate |
| 5103 // or in ecx.) | 5021 // or in ecx.) |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5114 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
| 5197 From = copyToReg(From, RegNum); | 5115 From = copyToReg(From, RegNum); |
| 5198 } | 5116 } |
| 5199 return From; | 5117 return From; |
| 5200 } | 5118 } |
| 5201 llvm_unreachable("Unhandled operand kind in legalize()"); | 5119 llvm_unreachable("Unhandled operand kind in legalize()"); |
| 5202 return From; | 5120 return From; |
| 5203 } | 5121 } |
| 5204 | 5122 |
| 5205 // Provide a trivial wrapper to legalize() for this common usage. | 5123 // Provide a trivial wrapper to legalize() for this common usage. |
| 5206 Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) { | 5124 template <class Machine> |
| 5125 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { | |
| 5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 5126 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
| 5208 } | 5127 } |
| 5209 | 5128 |
| 5210 // For the cmp instruction, if Src1 is an immediate, or known to be a | 5129 // For the cmp instruction, if Src1 is an immediate, or known to be a |
| 5211 // physical register, we can allow Src0 to be a memory operand. | 5130 // physical register, we can allow Src0 to be a memory operand. |
| 5212 // Otherwise, Src0 must be copied into a physical register. | 5131 // Otherwise, Src0 must be copied into a physical register. |
| 5213 // (Actually, either Src0 or Src1 can be chosen for the physical | 5132 // (Actually, either Src0 or Src1 can be chosen for the physical |
| 5214 // register, but unfortunately we have to commit to one or the other | 5133 // register, but unfortunately we have to commit to one or the other |
| 5215 // before register allocation.) | 5134 // before register allocation.) |
| 5216 Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { | 5135 template <class Machine> |
| 5136 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, | |
| 5137 Operand *Src1) { | |
| 5217 bool IsSrc1ImmOrReg = false; | 5138 bool IsSrc1ImmOrReg = false; |
| 5218 if (llvm::isa<Constant>(Src1)) { | 5139 if (llvm::isa<Constant>(Src1)) { |
| 5219 IsSrc1ImmOrReg = true; | 5140 IsSrc1ImmOrReg = true; |
| 5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 5141 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 5221 if (Var->hasReg()) | 5142 if (Var->hasReg()) |
| 5222 IsSrc1ImmOrReg = true; | 5143 IsSrc1ImmOrReg = true; |
| 5223 } | 5144 } |
| 5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); | 5145 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| 5225 } | 5146 } |
| 5226 | 5147 |
| 5227 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty, | 5148 template <class Machine> |
| 5228 bool DoLegalize) { | 5149 OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, |
| 5150 Type Ty, | |
| 5151 bool DoLegalize) { | |
| 5229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); | 5152 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); |
| 5230 // It may be the case that address mode optimization already creates | 5153 // It may be the case that address mode optimization already creates |
| 5231 // an OperandX8632Mem, so in that case it wouldn't need another level | 5154 // an OperandX8632Mem, so in that case it wouldn't need another level |
| 5232 // of transformation. | 5155 // of transformation. |
| 5233 if (!Mem) { | 5156 if (!Mem) { |
| 5234 Variable *Base = llvm::dyn_cast<Variable>(Opnd); | 5157 Variable *Base = llvm::dyn_cast<Variable>(Opnd); |
| 5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); | 5158 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); |
| 5236 assert(Base || Offset); | 5159 assert(Base || Offset); |
| 5237 if (Offset) { | 5160 if (Offset) { |
| 5238 // During memory operand building, we do not blind or pool | 5161 // During memory operand building, we do not blind or pool |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 5250 llvm::isa<ConstantRelocatable>(Offset)); | 5173 llvm::isa<ConstantRelocatable>(Offset)); |
| 5251 } | 5174 } |
| 5252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); | 5175 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); |
| 5253 } | 5176 } |
| 5254 // Do legalization, which contains randomization/pooling | 5177 // Do legalization, which contains randomization/pooling |
| 5255 // or do randomization/pooling. | 5178 // or do randomization/pooling. |
| 5256 return llvm::cast<OperandX8632Mem>( | 5179 return llvm::cast<OperandX8632Mem>( |
| 5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); | 5180 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); |
| 5258 } | 5181 } |
| 5259 | 5182 |
| 5260 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | 5183 template <class Machine> |
| 5184 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { | |
| 5261 // There aren't any 64-bit integer registers for x86-32. | 5185 // There aren't any 64-bit integer registers for x86-32. |
| 5262 assert(Type != IceType_i64); | 5186 assert(Type != IceType_i64); |
| 5263 Variable *Reg = Func->makeVariable(Type); | 5187 Variable *Reg = Func->makeVariable(Type); |
| 5264 if (RegNum == Variable::NoRegister) | 5188 if (RegNum == Variable::NoRegister) |
| 5265 Reg->setWeightInfinite(); | 5189 Reg->setWeightInfinite(); |
| 5266 else | 5190 else |
| 5267 Reg->setRegNum(RegNum); | 5191 Reg->setRegNum(RegNum); |
| 5268 return Reg; | 5192 return Reg; |
| 5269 } | 5193 } |
| 5270 | 5194 |
| 5271 void TargetX8632::postLower() { | 5195 template <class Machine> void TargetX86Base<Machine>::postLower() { |
| 5272 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 5196 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 5273 return; | 5197 return; |
| 5274 inferTwoAddress(); | 5198 inferTwoAddress(); |
| 5275 } | 5199 } |
| 5276 | 5200 |
| 5277 void TargetX8632::makeRandomRegisterPermutation( | 5201 template <class Machine> |
| 5202 void TargetX86Base<Machine>::makeRandomRegisterPermutation( | |
| 5278 llvm::SmallVectorImpl<int32_t> &Permutation, | 5203 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 5279 const llvm::SmallBitVector &ExcludeRegisters) const { | 5204 const llvm::SmallBitVector &ExcludeRegisters) const { |
| 5280 // TODO(stichnot): Declaring Permutation this way loses type/size | 5205 // TODO(stichnot): Declaring Permutation this way loses type/size |
| 5281 // information. Fix this in conjunction with the caller-side TODO. | 5206 // information. Fix this in conjunction with the caller-side TODO. |
| 5282 assert(Permutation.size() >= RegX8632::Reg_NUM); | 5207 assert(Permutation.size() >= RegX8632::Reg_NUM); |
| 5283 // Expected upper bound on the number of registers in a single | 5208 // Expected upper bound on the number of registers in a single |
| 5284 // equivalence class. For x86-32, this would comprise the 8 XMM | 5209 // equivalence class. For x86-32, this would comprise the 8 XMM |
| 5285 // registers. This is for performance, not correctness. | 5210 // registers. This is for performance, not correctness. |
| 5286 static const unsigned MaxEquivalenceClassSize = 8; | 5211 static const unsigned MaxEquivalenceClassSize = 8; |
| 5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; | 5212 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5334 if (!First) | 5259 if (!First) |
| 5335 Str << " "; | 5260 Str << " "; |
| 5336 First = false; | 5261 First = false; |
| 5337 Str << getRegName(Register, IceType_i32); | 5262 Str << getRegName(Register, IceType_i32); |
| 5338 } | 5263 } |
| 5339 Str << "}\n"; | 5264 Str << "}\n"; |
| 5340 } | 5265 } |
| 5341 } | 5266 } |
| 5342 } | 5267 } |
| 5343 | 5268 |
| 5344 void TargetX8632::emit(const ConstantInteger32 *C) const { | 5269 template <class Machine> |
| 5270 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { | |
| 5345 if (!ALLOW_DUMP) | 5271 if (!ALLOW_DUMP) |
| 5346 return; | 5272 return; |
| 5347 Ostream &Str = Ctx->getStrEmit(); | 5273 Ostream &Str = Ctx->getStrEmit(); |
| 5348 Str << getConstantPrefix() << C->getValue(); | 5274 Str << getConstantPrefix() << C->getValue(); |
| 5349 } | 5275 } |
| 5350 | 5276 |
| 5351 void TargetX8632::emit(const ConstantInteger64 *) const { | 5277 template <class Machine> |
| 5278 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { | |
| 5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 5279 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); |
| 5353 } | 5280 } |
| 5354 | 5281 |
| 5355 void TargetX8632::emit(const ConstantFloat *C) const { | 5282 template <class Machine> |
| 5283 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { | |
| 5356 if (!ALLOW_DUMP) | 5284 if (!ALLOW_DUMP) |
| 5357 return; | 5285 return; |
| 5358 Ostream &Str = Ctx->getStrEmit(); | 5286 Ostream &Str = Ctx->getStrEmit(); |
| 5359 C->emitPoolLabel(Str); | 5287 C->emitPoolLabel(Str); |
| 5360 } | 5288 } |
| 5361 | 5289 |
| 5362 void TargetX8632::emit(const ConstantDouble *C) const { | 5290 template <class Machine> |
| 5291 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const { | |
| 5363 if (!ALLOW_DUMP) | 5292 if (!ALLOW_DUMP) |
| 5364 return; | 5293 return; |
| 5365 Ostream &Str = Ctx->getStrEmit(); | 5294 Ostream &Str = Ctx->getStrEmit(); |
| 5366 C->emitPoolLabel(Str); | 5295 C->emitPoolLabel(Str); |
| 5367 } | 5296 } |
| 5368 | 5297 |
| 5369 void TargetX8632::emit(const ConstantUndef *) const { | 5298 template <class Machine> |
| 5299 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { | |
| 5370 llvm::report_fatal_error("undef value encountered by emitter."); | 5300 llvm::report_fatal_error("undef value encountered by emitter."); |
| 5371 } | 5301 } |
| 5372 | 5302 |
| 5373 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) | |
| 5374 : TargetDataLowering(Ctx) {} | |
| 5375 | |
| 5376 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars, | |
| 5377 const IceString &SectionSuffix) { | |
| 5378 switch (Ctx->getFlags().getOutFileType()) { | |
| 5379 case FT_Elf: { | |
| 5380 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | |
| 5381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix); | |
| 5382 } break; | |
| 5383 case FT_Asm: | |
| 5384 case FT_Iasm: { | |
| 5385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly(); | |
| 5386 OstreamLocker L(Ctx); | |
| 5387 for (const VariableDeclaration *Var : Vars) { | |
| 5388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { | |
| 5389 emitGlobal(*Var, SectionSuffix); | |
| 5390 } | |
| 5391 } | |
| 5392 } break; | |
| 5393 } | |
| 5394 } | |
| 5395 | |
| 5396 template <typename T> struct PoolTypeConverter {}; | |
| 5397 | |
| 5398 template <> struct PoolTypeConverter<float> { | |
| 5399 typedef uint32_t PrimitiveIntType; | |
| 5400 typedef ConstantFloat IceType; | |
| 5401 static const Type Ty = IceType_f32; | |
| 5402 static const char *TypeName; | |
| 5403 static const char *AsmTag; | |
| 5404 static const char *PrintfString; | |
| 5405 }; | |
| 5406 const char *PoolTypeConverter<float>::TypeName = "float"; | |
| 5407 const char *PoolTypeConverter<float>::AsmTag = ".long"; | |
| 5408 const char *PoolTypeConverter<float>::PrintfString = "0x%x"; | |
| 5409 | |
| 5410 template <> struct PoolTypeConverter<double> { | |
| 5411 typedef uint64_t PrimitiveIntType; | |
| 5412 typedef ConstantDouble IceType; | |
| 5413 static const Type Ty = IceType_f64; | |
| 5414 static const char *TypeName; | |
| 5415 static const char *AsmTag; | |
| 5416 static const char *PrintfString; | |
| 5417 }; | |
| 5418 const char *PoolTypeConverter<double>::TypeName = "double"; | |
| 5419 const char *PoolTypeConverter<double>::AsmTag = ".quad"; | |
| 5420 const char *PoolTypeConverter<double>::PrintfString = "0x%llx"; | |
| 5421 | |
| 5422 // Add converter for int type constant pooling | |
| 5423 template <> struct PoolTypeConverter<uint32_t> { | |
| 5424 typedef uint32_t PrimitiveIntType; | |
| 5425 typedef ConstantInteger32 IceType; | |
| 5426 static const Type Ty = IceType_i32; | |
| 5427 static const char *TypeName; | |
| 5428 static const char *AsmTag; | |
| 5429 static const char *PrintfString; | |
| 5430 }; | |
| 5431 const char *PoolTypeConverter<uint32_t>::TypeName = "i32"; | |
| 5432 const char *PoolTypeConverter<uint32_t>::AsmTag = ".long"; | |
| 5433 const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x"; | |
| 5434 | |
| 5435 // Add converter for int type constant pooling | |
| 5436 template <> struct PoolTypeConverter<uint16_t> { | |
| 5437 typedef uint32_t PrimitiveIntType; | |
| 5438 typedef ConstantInteger32 IceType; | |
| 5439 static const Type Ty = IceType_i16; | |
| 5440 static const char *TypeName; | |
| 5441 static const char *AsmTag; | |
| 5442 static const char *PrintfString; | |
| 5443 }; | |
| 5444 const char *PoolTypeConverter<uint16_t>::TypeName = "i16"; | |
| 5445 const char *PoolTypeConverter<uint16_t>::AsmTag = ".short"; | |
| 5446 const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x"; | |
| 5447 | |
| 5448 // Add converter for int type constant pooling | |
| 5449 template <> struct PoolTypeConverter<uint8_t> { | |
| 5450 typedef uint32_t PrimitiveIntType; | |
| 5451 typedef ConstantInteger32 IceType; | |
| 5452 static const Type Ty = IceType_i8; | |
| 5453 static const char *TypeName; | |
| 5454 static const char *AsmTag; | |
| 5455 static const char *PrintfString; | |
| 5456 }; | |
| 5457 const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; | |
| 5458 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; | |
| 5459 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; | |
| 5460 | |
| 5461 template <typename T> | |
| 5462 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { | |
| 5463 if (!ALLOW_DUMP) | |
| 5464 return; | |
| 5465 Ostream &Str = Ctx->getStrEmit(); | |
| 5466 Type Ty = T::Ty; | |
| 5467 SizeT Align = typeAlignInBytes(Ty); | |
| 5468 ConstantList Pool = Ctx->getConstantPool(Ty); | |
| 5469 | |
| 5470 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align | |
| 5471 << "\n"; | |
| 5472 Str << "\t.align\t" << Align << "\n"; | |
| 5473 for (Constant *C : Pool) { | |
| 5474 if (!C->getShouldBePooled()) | |
| 5475 continue; | |
| 5476 typename T::IceType *Const = llvm::cast<typename T::IceType>(C); | |
| 5477 typename T::IceType::PrimType Value = Const->getValue(); | |
| 5478 // Use memcpy() to copy bits from Value into RawValue in a way | |
| 5479 // that avoids breaking strict-aliasing rules. | |
| 5480 typename T::PrimitiveIntType RawValue; | |
| 5481 memcpy(&RawValue, &Value, sizeof(Value)); | |
| 5482 char buf[30]; | |
| 5483 int CharsPrinted = | |
| 5484 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue); | |
| 5485 assert(CharsPrinted >= 0 && | |
| 5486 (size_t)CharsPrinted < llvm::array_lengthof(buf)); | |
| 5487 (void)CharsPrinted; // avoid warnings if asserts are disabled | |
| 5488 Const->emitPoolLabel(Str); | |
| 5489 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " " | |
| 5490 << Value << "\n"; | |
| 5491 } | |
| 5492 } | |
| 5493 | |
| 5494 void TargetDataX8632::lowerConstants() { | |
| 5495 if (Ctx->getFlags().getDisableTranslation()) | |
| 5496 return; | |
| 5497 // No need to emit constants from the int pool since (for x86) they | |
| 5498 // are embedded as immediates in the instructions, just emit float/double. | |
| 5499 switch (Ctx->getFlags().getOutFileType()) { | |
| 5500 case FT_Elf: { | |
| 5501 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | |
| 5502 | |
| 5503 Writer->writeConstantPool<ConstantInteger32>(IceType_i8); | |
| 5504 Writer->writeConstantPool<ConstantInteger32>(IceType_i16); | |
| 5505 Writer->writeConstantPool<ConstantInteger32>(IceType_i32); | |
| 5506 | |
| 5507 Writer->writeConstantPool<ConstantFloat>(IceType_f32); | |
| 5508 Writer->writeConstantPool<ConstantDouble>(IceType_f64); | |
| 5509 } break; | |
| 5510 case FT_Asm: | |
| 5511 case FT_Iasm: { | |
| 5512 OstreamLocker L(Ctx); | |
| 5513 | |
| 5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx); | |
| 5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx); | |
| 5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx); | |
| 5517 | |
| 5518 emitConstantPool<PoolTypeConverter<float>>(Ctx); | |
| 5519 emitConstantPool<PoolTypeConverter<double>>(Ctx); | |
| 5520 } break; | |
| 5521 } | |
| 5522 } | |
| 5523 | |
| 5524 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) | |
| 5525 : TargetHeaderLowering(Ctx) {} | |
| 5526 | |
| 5527 // Randomize or pool an Immediate. | 5303 // Randomize or pool an Immediate. |
| 5528 Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate, | 5304 template <class Machine> |
| 5529 int32_t RegNum) { | 5305 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, |
| 5306 int32_t RegNum) { | |
| 5530 assert(llvm::isa<ConstantInteger32>(Immediate) || | 5307 assert(llvm::isa<ConstantInteger32>(Immediate) || |
| 5531 llvm::isa<ConstantRelocatable>(Immediate)); | 5308 llvm::isa<ConstantRelocatable>(Immediate)); |
| 5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 5309 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 5533 RandomizationPoolingPaused == true) { | 5310 RandomizationPoolingPaused == true) { |
| 5534 // Immediates randomization/pooling off or paused | 5311 // Immediates randomization/pooling off or paused |
| 5535 return Immediate; | 5312 return Immediate; |
| 5536 } | 5313 } |
| 5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { | 5314 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { |
| 5538 Ctx->statsUpdateRPImms(); | 5315 Ctx->statsUpdateRPImms(); |
| 5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == | 5316 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); | 5372 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); |
| 5596 _mov(Reg, MemOperand); | 5373 _mov(Reg, MemOperand); |
| 5597 return Reg; | 5374 return Reg; |
| 5598 } | 5375 } |
| 5599 assert("Unsupported -randomize-pool-immediates option" && false); | 5376 assert("Unsupported -randomize-pool-immediates option" && false); |
| 5600 } | 5377 } |
| 5601 // the constant Immediate is not eligible for blinding/pooling | 5378 // the constant Immediate is not eligible for blinding/pooling |
| 5602 return Immediate; | 5379 return Immediate; |
| 5603 } | 5380 } |
| 5604 | 5381 |
| 5382 template <class Machine> | |
| 5605 OperandX8632Mem * | 5383 OperandX8632Mem * |
| 5606 TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, | 5384 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, |
| 5607 int32_t RegNum) { | 5385 int32_t RegNum) { |
| 5608 assert(MemOperand); | 5386 assert(MemOperand); |
| 5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 5387 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 5610 RandomizationPoolingPaused == true) { | 5388 RandomizationPoolingPaused == true) { |
| 5611 // immediates randomization/pooling is turned off | 5389 // immediates randomization/pooling is turned off |
| 5612 return MemOperand; | 5390 return MemOperand; |
| 5613 } | 5391 } |
| 5614 | 5392 |
| 5615 // If this memory operand is already a randommized one, we do | 5393 // If this memory operand is already a randommized one, we do |
| 5616 // not randomize it again. | 5394 // not randomize it again. |
| 5617 if (MemOperand->getRandomized()) | 5395 if (MemOperand->getRandomized()) |
| 5618 return MemOperand; | 5396 return MemOperand; |
| 5619 | 5397 |
| 5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { | 5398 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { |
| 5621 if (C->shouldBeRandomizedOrPooled(Ctx)) { | 5399 if (C->shouldBeRandomizedOrPooled(Ctx)) { |
| 5622 // The offset of this mem operand should be blinded or pooled | 5400 // The offset of this mem operand should be blinded or pooled |
| 5623 Ctx->statsUpdateRPImms(); | 5401 Ctx->statsUpdateRPImms(); |
| 5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == | 5402 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == |
| 5625 RPI_Randomize) { | 5403 RPI_Randomize) { |
| 5626 // blind the constant offset | 5404 // blind the constant offset |
| 5627 // FROM: | 5405 // FROM: |
| 5628 // offset[base, index, shift] | 5406 // offset[base, index, shift] |
| 5629 // TO: | 5407 // TO: |
| 5630 // insert: lea offset+cookie[base], RegTemp | 5408 // insert: lea offset+cookie[base], RegTemp |
| 5631 // => -cookie[RegTemp, index, shift] | 5409 // => -cookie[RegTemp, index, shift] |
| 5632 uint32_t Value = | 5410 uint32_t Value = llvm::dyn_cast<ConstantInteger32>( |
| 5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) | 5411 MemOperand->getOffset())->getValue(); |
| 5634 ->getValue(); | |
| 5635 uint32_t Cookie = Ctx->getRandomizationCookie(); | 5412 uint32_t Cookie = Ctx->getRandomizationCookie(); |
| 5636 Constant *Mask1 = Ctx->getConstantInt( | 5413 Constant *Mask1 = Ctx->getConstantInt( |
| 5637 MemOperand->getOffset()->getType(), Cookie + Value); | 5414 MemOperand->getOffset()->getType(), Cookie + Value); |
| 5638 Constant *Mask2 = | 5415 Constant *Mask2 = |
| 5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); | 5416 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); |
| 5640 | 5417 |
| 5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( | 5418 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( |
| 5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); | 5419 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); |
| 5643 // If we have already assigned a physical register, we must come from | 5420 // If we have already assigned a physical register, we must come from |
| 5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse | 5421 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5710 return NewMemOperand; | 5487 return NewMemOperand; |
| 5711 } | 5488 } |
| 5712 assert("Unsupported -randomize-pool-immediates option" && false); | 5489 assert("Unsupported -randomize-pool-immediates option" && false); |
| 5713 } | 5490 } |
| 5714 } | 5491 } |
| 5715 // the offset is not eligible for blinding or pooling, return the original | 5492 // the offset is not eligible for blinding or pooling, return the original |
| 5716 // mem operand | 5493 // mem operand |
| 5717 return MemOperand; | 5494 return MemOperand; |
| 5718 } | 5495 } |
| 5719 | 5496 |
| 5497 } // end of namespace X86Internal | |
| 5720 } // end of namespace Ice | 5498 } // end of namespace Ice |
| OLD | NEW |