Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering *- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX86Base class, which |
| 11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
| 12 // high-level instruction. | 12 // high-level instruction. |
| 13 // | 13 // |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
| 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | |
| 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | |
| 18 | |
| 16 #include "llvm/Support/MathExtras.h" | 19 #include "llvm/Support/MathExtras.h" |
| 17 | 20 |
| 18 #include "IceCfg.h" | 21 #include "IceCfg.h" |
| 19 #include "IceCfgNode.h" | 22 #include "IceCfgNode.h" |
| 20 #include "IceClFlags.h" | 23 #include "IceClFlags.h" |
| 21 #include "IceDefs.h" | 24 #include "IceDefs.h" |
| 22 #include "IceELFObjectWriter.h" | 25 #include "IceELFObjectWriter.h" |
| 23 #include "IceGlobalInits.h" | 26 #include "IceGlobalInits.h" |
| 24 #include "IceInstX8632.h" | 27 #include "IceInstX8632.h" |
| 25 #include "IceLiveness.h" | 28 #include "IceLiveness.h" |
| 26 #include "IceOperand.h" | 29 #include "IceOperand.h" |
| 27 #include "IceRegistersX8632.h" | 30 #include "IceRegistersX8632.h" |
| 28 #include "IceTargetLoweringX8632.def" | 31 #include "IceTargetLoweringX8632.def" |
| 29 #include "IceTargetLoweringX8632.h" | 32 #include "IceTargetLoweringX8632.h" |
| 30 #include "IceUtils.h" | 33 #include "IceUtils.h" |
| 31 | 34 |
| 32 namespace Ice { | 35 namespace Ice { |
| 33 | 36 namespace X86Internal { |
| 34 namespace { | |
| 35 | |
| 36 // The following table summarizes the logic for lowering the fcmp | |
| 37 // instruction. There is one table entry for each of the 16 conditions. | |
| 38 // | |
| 39 // The first four columns describe the case when the operands are | |
| 40 // floating point scalar values. A comment in lowerFcmp() describes the | |
| 41 // lowering template. In the most general case, there is a compare | |
| 42 // followed by two conditional branches, because some fcmp conditions | |
| 43 // don't map to a single x86 conditional branch. However, in many cases | |
| 44 // it is possible to swap the operands in the comparison and have a | |
| 45 // single conditional branch. Since it's quite tedious to validate the | |
| 46 // table by hand, good execution tests are helpful. | |
| 47 // | |
| 48 // The last two columns describe the case when the operands are vectors | |
| 49 // of floating point values. For most fcmp conditions, there is a clear | |
| 50 // mapping to a single x86 cmpps instruction variant. Some fcmp | |
| 51 // conditions require special code to handle and these are marked in the | |
| 52 // table with a Cmpps_Invalid predicate. | |
| 53 const struct TableFcmp_ { | |
| 54 uint32_t Default; | |
| 55 bool SwapScalarOperands; | |
| 56 CondX86::BrCond C1, C2; | |
| 57 bool SwapVectorOperands; | |
| 58 CondX86::CmppsCond Predicate; | |
| 59 } TableFcmp[] = { | |
| 60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | |
| 61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ | |
| 62 , | |
| 63 FCMPX8632_TABLE | |
| 64 #undef X | |
| 65 }; | |
| 66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | |
| 67 | |
| 68 // The following table summarizes the logic for lowering the icmp instruction | |
| 69 // for i32 and narrower types. Each icmp condition has a clear mapping to an | |
| 70 // x86 conditional branch instruction. | |
| 71 | |
| 72 const struct TableIcmp32_ { | |
| 73 CondX86::BrCond Mapping; | |
| 74 } TableIcmp32[] = { | |
| 75 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
| 76 { CondX86::C_32 } \ | |
| 77 , | |
| 78 ICMPX8632_TABLE | |
| 79 #undef X | |
| 80 }; | |
| 81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); | |
| 82 | |
| 83 // The following table summarizes the logic for lowering the icmp instruction | |
| 84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | |
| 85 // conditional branches are needed. For the other conditions, three separate | |
| 86 // conditional branches are needed. | |
| 87 const struct TableIcmp64_ { | |
| 88 CondX86::BrCond C1, C2, C3; | |
| 89 } TableIcmp64[] = { | |
| 90 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
| 91 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \ | |
| 92 , | |
| 93 ICMPX8632_TABLE | |
| 94 #undef X | |
| 95 }; | |
| 96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | |
| 97 | |
| 98 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | |
| 99 size_t Index = static_cast<size_t>(Cond); | |
| 100 assert(Index < TableIcmp32Size); | |
| 101 return TableIcmp32[Index].Mapping; | |
| 102 } | |
| 103 | |
| 104 const struct TableTypeX8632Attributes_ { | |
| 105 Type InVectorElementType; | |
| 106 } TableTypeX8632Attributes[] = { | |
| 107 #define X(tag, elementty, cvt, sdss, pack, width, fld) \ | |
| 108 { elementty } \ | |
| 109 , | |
| 110 ICETYPEX8632_TABLE | |
| 111 #undef X | |
| 112 }; | |
| 113 const size_t TableTypeX8632AttributesSize = | |
| 114 llvm::array_lengthof(TableTypeX8632Attributes); | |
| 115 | |
| 116 // Return the type which the elements of the vector have in the X86 | |
| 117 // representation of the vector. | |
| 118 Type getInVectorElementType(Type Ty) { | |
| 119 assert(isVectorType(Ty)); | |
| 120 size_t Index = static_cast<size_t>(Ty); | |
| 121 (void)Index; | |
| 122 assert(Index < TableTypeX8632AttributesSize); | |
| 123 return TableTypeX8632Attributes[Ty].InVectorElementType; | |
| 124 } | |
| 125 | |
| 126 // The maximum number of arguments to pass in XMM registers | |
| 127 const uint32_t X86_MAX_XMM_ARGS = 4; | |
| 128 // The number of bits in a byte | |
| 129 const uint32_t X86_CHAR_BIT = 8; | |
| 130 // Stack alignment | |
| 131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; | |
| 132 // Size of the return address on the stack | |
| 133 const uint32_t X86_RET_IP_SIZE_BYTES = 4; | |
| 134 // The number of different NOP instructions | |
| 135 const uint32_t X86_NUM_NOP_VARIANTS = 5; | |
| 136 | |
| 137 // Value is in bytes. Return Value adjusted to the next highest multiple | |
| 138 // of the stack alignment. | |
| 139 uint32_t applyStackAlignment(uint32_t Value) { | |
| 140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | |
| 141 } | |
| 142 | |
| 143 // In some cases, there are x-macros tables for both high-level and | |
| 144 // low-level instructions/operands that use the same enum key value. | |
| 145 // The tables are kept separate to maintain a proper separation | |
| 146 // between abstraction layers. There is a risk that the tables could | |
| 147 // get out of sync if enum values are reordered or if entries are | |
| 148 // added or deleted. The following dummy namespaces use | |
| 149 // static_asserts to ensure everything is kept in sync. | |
| 150 | |
| 151 // Validate the enum values in FCMPX8632_TABLE. | |
| 152 namespace dummy1 { | |
| 153 // Define a temporary set of enum values based on low-level table | |
| 154 // entries. | |
| 155 enum _tmp_enum { | |
| 156 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, | |
| 157 FCMPX8632_TABLE | |
| 158 #undef X | |
| 159 _num | |
| 160 }; | |
| 161 // Define a set of constants based on high-level table entries. | |
| 162 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | |
| 163 ICEINSTFCMP_TABLE | |
| 164 #undef X | |
| 165 // Define a set of constants based on low-level table entries, and | |
| 166 // ensure the table entry keys are consistent. | |
| 167 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | |
| 168 static const int _table2_##val = _tmp_##val; \ | |
| 169 static_assert( \ | |
| 170 _table1_##val == _table2_##val, \ | |
| 171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); | |
| 172 FCMPX8632_TABLE | |
| 173 #undef X | |
| 174 // Repeat the static asserts with respect to the high-level table | |
| 175 // entries in case the high-level table has extra entries. | |
| 176 #define X(tag, str) \ | |
| 177 static_assert( \ | |
| 178 _table1_##tag == _table2_##tag, \ | |
| 179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); | |
| 180 ICEINSTFCMP_TABLE | |
| 181 #undef X | |
| 182 } // end of namespace dummy1 | |
| 183 | |
| 184 // Validate the enum values in ICMPX8632_TABLE. | |
| 185 namespace dummy2 { | |
| 186 // Define a temporary set of enum values based on low-level table | |
| 187 // entries. | |
| 188 enum _tmp_enum { | |
| 189 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | |
| 190 ICMPX8632_TABLE | |
| 191 #undef X | |
| 192 _num | |
| 193 }; | |
| 194 // Define a set of constants based on high-level table entries. | |
| 195 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | |
| 196 ICEINSTICMP_TABLE | |
| 197 #undef X | |
| 198 // Define a set of constants based on low-level table entries, and | |
| 199 // ensure the table entry keys are consistent. | |
| 200 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
| 201 static const int _table2_##val = _tmp_##val; \ | |
| 202 static_assert( \ | |
| 203 _table1_##val == _table2_##val, \ | |
| 204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); | |
| 205 ICMPX8632_TABLE | |
| 206 #undef X | |
| 207 // Repeat the static asserts with respect to the high-level table | |
| 208 // entries in case the high-level table has extra entries. | |
| 209 #define X(tag, str) \ | |
| 210 static_assert( \ | |
| 211 _table1_##tag == _table2_##tag, \ | |
| 212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); | |
| 213 ICEINSTICMP_TABLE | |
| 214 #undef X | |
| 215 } // end of namespace dummy2 | |
| 216 | |
| 217 // Validate the enum values in ICETYPEX8632_TABLE. | |
| 218 namespace dummy3 { | |
| 219 // Define a temporary set of enum values based on low-level table | |
| 220 // entries. | |
| 221 enum _tmp_enum { | |
| 222 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag, | |
| 223 ICETYPEX8632_TABLE | |
| 224 #undef X | |
| 225 _num | |
| 226 }; | |
| 227 // Define a set of constants based on high-level table entries. | |
| 228 #define X(tag, size, align, elts, elty, str) \ | |
| 229 static const int _table1_##tag = tag; | |
| 230 ICETYPE_TABLE | |
| 231 #undef X | |
| 232 // Define a set of constants based on low-level table entries, and | |
| 233 // ensure the table entry keys are consistent. | |
| 234 #define X(tag, elementty, cvt, sdss, pack, width, fld) \ | |
| 235 static const int _table2_##tag = _tmp_##tag; \ | |
| 236 static_assert(_table1_##tag == _table2_##tag, \ | |
| 237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | |
| 238 ICETYPEX8632_TABLE | |
| 239 #undef X | |
| 240 // Repeat the static asserts with respect to the high-level table | |
| 241 // entries in case the high-level table has extra entries. | |
| 242 #define X(tag, size, align, elts, elty, str) \ | |
| 243 static_assert(_table1_##tag == _table2_##tag, \ | |
| 244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | |
| 245 ICETYPE_TABLE | |
| 246 #undef X | |
| 247 } // end of namespace dummy3 | |
| 248 | 37 |
| 249 // A helper class to ease the settings of RandomizationPoolingPause | 38 // A helper class to ease the settings of RandomizationPoolingPause |
| 250 // to disable constant blinding or pooling for some translation phases. | 39 // to disable constant blinding or pooling for some translation phases. |
| 251 class BoolFlagSaver { | 40 class BoolFlagSaver { |
| 252 BoolFlagSaver() = delete; | 41 BoolFlagSaver() = delete; |
| 253 BoolFlagSaver(const BoolFlagSaver &) = delete; | 42 BoolFlagSaver(const BoolFlagSaver &) = delete; |
| 254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; | 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; |
| 255 | 44 |
| 256 public: | 45 public: |
| 257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } | 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } |
| 258 ~BoolFlagSaver() { Flag = OldValue; } | 47 ~BoolFlagSaver() { Flag = OldValue; } |
| 259 | 48 |
| 260 private: | 49 private: |
| 261 const bool OldValue; | 50 const bool OldValue; |
| 262 bool &Flag; | 51 bool &Flag; |
| 263 }; | 52 }; |
| 264 | 53 |
| 265 } // end of anonymous namespace | 54 template <class MachineTraits> class BoolFoldingEntry { |
| 55 BoolFoldingEntry(const BoolFoldingEntry &) = delete; | |
| 266 | 56 |
| 267 BoolFoldingEntry::BoolFoldingEntry(Inst *I) | 57 public: |
| 268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {} | 58 BoolFoldingEntry() = default; |
| 59 explicit BoolFoldingEntry(Inst *I); | |
| 60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; | |
| 61 // Instr is the instruction producing the i1-type variable of interest. | |
| 62 Inst *Instr = nullptr; | |
| 63 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). | |
| 64 bool IsComplex = false; | |
| 65 // IsLiveOut is initialized conservatively to true, and is set to false when | |
| 66 // we encounter an instruction that ends Var's live range. We disable the | |
| 67 // folding optimization when Var is live beyond this basic block. Note that | |
| 68 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will | |
| 69 // always be true and the folding optimization will never be performed. | |
| 70 bool IsLiveOut = true; | |
| 71 // NumUses counts the number of times Var is used as a source operand in the | |
| 72 // basic block. If IsComplex is true and there is more than one use of Var, | |
| 73 // then the folding optimization is disabled for Var. | |
| 74 uint32_t NumUses = 0; | |
| 75 }; | |
| 269 | 76 |
| 270 BoolFolding::BoolFoldingProducerKind | 77 template <class MachineTraits> class BoolFolding { |
| 271 BoolFolding::getProducerKind(const Inst *Instr) { | 78 public: |
| 79 enum BoolFoldingProducerKind { | |
| 80 PK_None, | |
| 81 PK_Icmp32, | |
| 82 PK_Icmp64, | |
| 83 PK_Fcmp, | |
| 84 PK_Trunc | |
| 85 }; | |
| 86 | |
| 87 // Currently the actual enum values are not used (other than CK_None), but we | |
| 88 // go | |
| 89 // ahead and produce them anyway for symmetry with the | |
| 90 // BoolFoldingProducerKind. | |
| 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | |
| 92 | |
| 93 private: | |
| 94 BoolFolding(const BoolFolding &) = delete; | |
| 95 BoolFolding &operator=(const BoolFolding &) = delete; | |
| 96 | |
| 97 public: | |
| 98 BoolFolding() = default; | |
| 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); | |
| 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); | |
| 101 static bool hasComplexLowering(const Inst *Instr); | |
| 102 void init(CfgNode *Node); | |
| 103 const Inst *getProducerFor(const Operand *Opnd) const; | |
| 104 void dump(const Cfg *Func) const; | |
| 105 | |
| 106 private: | |
| 107 // Returns true if Producers contains a valid entry for the given VarNum. | |
| 108 bool containsValid(SizeT VarNum) const { | |
| 109 auto Element = Producers.find(VarNum); | |
| 110 return Element != Producers.end() && Element->second.Instr != nullptr; | |
| 111 } | |
| 112 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } | |
| 113 // Producers maps Variable::Number to a BoolFoldingEntry. | |
| 114 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; | |
| 115 }; | |
| 116 | |
| 117 template <class MachineTraits> | |
| 118 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | |
| 119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | |
| 120 | |
| 121 template <class MachineTraits> | |
| 122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | |
| 123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | |
| 272 if (llvm::isa<InstIcmp>(Instr)) { | 124 if (llvm::isa<InstIcmp>(Instr)) { |
| 273 if (Instr->getSrc(0)->getType() != IceType_i64) | 125 if (Instr->getSrc(0)->getType() != IceType_i64) |
| 274 return PK_Icmp32; | 126 return PK_Icmp32; |
| 275 return PK_None; // TODO(stichnot): actually PK_Icmp64; | 127 return PK_None; // TODO(stichnot): actually PK_Icmp64; |
| 276 } | 128 } |
| 277 return PK_None; // TODO(stichnot): remove this | 129 return PK_None; // TODO(stichnot): remove this |
| 278 | 130 |
| 279 if (llvm::isa<InstFcmp>(Instr)) | 131 if (llvm::isa<InstFcmp>(Instr)) |
| 280 return PK_Fcmp; | 132 return PK_Fcmp; |
| 281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 133 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 282 switch (Cast->getCastKind()) { | 134 switch (Cast->getCastKind()) { |
| 283 default: | 135 default: |
| 284 return PK_None; | 136 return PK_None; |
| 285 case InstCast::Trunc: | 137 case InstCast::Trunc: |
| 286 return PK_Trunc; | 138 return PK_Trunc; |
| 287 } | 139 } |
| 288 } | 140 } |
| 289 return PK_None; | 141 return PK_None; |
| 290 } | 142 } |
| 291 | 143 |
| 292 BoolFolding::BoolFoldingConsumerKind | 144 template <class MachineTraits> |
| 293 BoolFolding::getConsumerKind(const Inst *Instr) { | 145 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind |
| 146 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) { | |
| 294 if (llvm::isa<InstBr>(Instr)) | 147 if (llvm::isa<InstBr>(Instr)) |
| 295 return CK_Br; | 148 return CK_Br; |
| 296 if (llvm::isa<InstSelect>(Instr)) | 149 if (llvm::isa<InstSelect>(Instr)) |
| 297 return CK_Select; | 150 return CK_Select; |
| 298 return CK_None; // TODO(stichnot): remove this | 151 return CK_None; // TODO(stichnot): remove this |
| 299 | 152 |
| 300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 153 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 301 switch (Cast->getCastKind()) { | 154 switch (Cast->getCastKind()) { |
| 302 default: | 155 default: |
| 303 return CK_None; | 156 return CK_None; |
| 304 case InstCast::Sext: | 157 case InstCast::Sext: |
| 305 return CK_Sext; | 158 return CK_Sext; |
| 306 case InstCast::Zext: | 159 case InstCast::Zext: |
| 307 return CK_Zext; | 160 return CK_Zext; |
| 308 } | 161 } |
| 309 } | 162 } |
| 310 return CK_None; | 163 return CK_None; |
| 311 } | 164 } |
| 312 | 165 |
| 313 // Returns true if the producing instruction has a "complex" lowering | 166 // Returns true if the producing instruction has a "complex" lowering |
| 314 // sequence. This generally means that its lowering sequence requires | 167 // sequence. This generally means that its lowering sequence requires |
| 315 // more than one conditional branch, namely 64-bit integer compares | 168 // more than one conditional branch, namely 64-bit integer compares |
| 316 // and some floating-point compares. When this is true, and there is | 169 // and some floating-point compares. When this is true, and there is |
| 317 // more than one consumer, we prefer to disable the folding | 170 // more than one consumer, we prefer to disable the folding |
| 318 // optimization because it minimizes branches. | 171 // optimization because it minimizes branches. |
| 319 bool BoolFolding::hasComplexLowering(const Inst *Instr) { | 172 template <class MachineTraits> |
| 173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | |
| 320 switch (getProducerKind(Instr)) { | 174 switch (getProducerKind(Instr)) { |
| 321 default: | 175 default: |
| 322 return false; | 176 return false; |
| 323 case PK_Icmp64: | 177 case PK_Icmp64: |
| 324 return true; | 178 return true; |
| 325 case PK_Fcmp: | 179 case PK_Fcmp: |
| 326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 != | 180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 327 CondX86::Br_None; | 181 .C2 != CondX86::Br_None; |
| 328 } | 182 } |
| 329 } | 183 } |
| 330 | 184 |
| 331 void BoolFolding::init(CfgNode *Node) { | 185 template <class MachineTraits> |
| 186 void BoolFolding<MachineTraits>::init(CfgNode *Node) { | |
| 332 Producers.clear(); | 187 Producers.clear(); |
| 333 for (Inst &Instr : Node->getInsts()) { | 188 for (Inst &Instr : Node->getInsts()) { |
| 334 // Check whether Instr is a valid producer. | 189 // Check whether Instr is a valid producer. |
| 335 Variable *Var = Instr.getDest(); | 190 Variable *Var = Instr.getDest(); |
| 336 if (!Instr.isDeleted() // only consider non-deleted instructions | 191 if (!Instr.isDeleted() // only consider non-deleted instructions |
| 337 && Var // only instructions with an actual dest var | 192 && Var // only instructions with an actual dest var |
| 338 && Var->getType() == IceType_i1 // only bool-type dest vars | 193 && Var->getType() == IceType_i1 // only bool-type dest vars |
| 339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions | 194 && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
| 340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr); | 195 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); |
| 341 } | 196 } |
| 342 // Check each src variable against the map. | 197 // Check each src variable against the map. |
| 343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { | 198 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) { |
| 344 Operand *Src = Instr.getSrc(I); | 199 Operand *Src = Instr.getSrc(I); |
| 345 SizeT NumVars = Src->getNumVars(); | 200 SizeT NumVars = Src->getNumVars(); |
| 346 for (SizeT J = 0; J < NumVars; ++J) { | 201 for (SizeT J = 0; J < NumVars; ++J) { |
| 347 const Variable *Var = Src->getVar(J); | 202 const Variable *Var = Src->getVar(J); |
| 348 SizeT VarNum = Var->getIndex(); | 203 SizeT VarNum = Var->getIndex(); |
| 349 if (containsValid(VarNum)) { | 204 if (containsValid(VarNum)) { |
| 350 if (I != 0 // All valid consumers use Var as the first source operand | 205 if (I != 0 // All valid consumers use Var as the first source operand |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 372 continue; | 227 continue; |
| 373 } | 228 } |
| 374 // Mark as "dead" rather than outright deleting. This is so that | 229 // Mark as "dead" rather than outright deleting. This is so that |
| 375 // other peephole style optimizations during or before lowering | 230 // other peephole style optimizations during or before lowering |
| 376 // have access to this instruction in undeleted form. See for | 231 // have access to this instruction in undeleted form. See for |
| 377 // example tryOptimizedCmpxchgCmpBr(). | 232 // example tryOptimizedCmpxchgCmpBr(). |
| 378 I.second.Instr->setDead(); | 233 I.second.Instr->setDead(); |
| 379 } | 234 } |
| 380 } | 235 } |
| 381 | 236 |
| 382 const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const { | 237 template <class MachineTraits> |
| 238 const Inst * | |
| 239 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { | |
| 383 auto *Var = llvm::dyn_cast<const Variable>(Opnd); | 240 auto *Var = llvm::dyn_cast<const Variable>(Opnd); |
| 384 if (Var == nullptr) | 241 if (Var == nullptr) |
| 385 return nullptr; | 242 return nullptr; |
| 386 SizeT VarNum = Var->getIndex(); | 243 SizeT VarNum = Var->getIndex(); |
| 387 auto Element = Producers.find(VarNum); | 244 auto Element = Producers.find(VarNum); |
| 388 if (Element == Producers.end()) | 245 if (Element == Producers.end()) |
| 389 return nullptr; | 246 return nullptr; |
| 390 return Element->second.Instr; | 247 return Element->second.Instr; |
| 391 } | 248 } |
| 392 | 249 |
| 393 void BoolFolding::dump(const Cfg *Func) const { | 250 template <class MachineTraits> |
| 251 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const { | |
| 394 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) | 252 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding)) |
| 395 return; | 253 return; |
| 396 OstreamLocker L(Func->getContext()); | 254 OstreamLocker L(Func->getContext()); |
| 397 Ostream &Str = Func->getContext()->getStrDump(); | 255 Ostream &Str = Func->getContext()->getStrDump(); |
| 398 for (auto &I : Producers) { | 256 for (auto &I : Producers) { |
| 399 if (I.second.Instr == nullptr) | 257 if (I.second.Instr == nullptr) |
| 400 continue; | 258 continue; |
| 401 Str << "Found foldable producer:\n "; | 259 Str << "Found foldable producer:\n "; |
| 402 I.second.Instr->dump(Func); | 260 I.second.Instr->dump(Func); |
| 403 Str << "\n"; | 261 Str << "\n"; |
| 404 } | 262 } |
| 405 } | 263 } |
| 406 | 264 |
| 407 void TargetX8632::initNodeForLowering(CfgNode *Node) { | 265 template <class Machine> |
| 266 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) { | |
| 408 FoldingInfo.init(Node); | 267 FoldingInfo.init(Node); |
| 409 FoldingInfo.dump(Func); | 268 FoldingInfo.dump(Func); |
| 410 } | 269 } |
| 411 | 270 |
| 412 TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) { | 271 template <class Machine> |
| 413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == | 272 TargetX86Base<Machine>::TargetX86Base(Cfg *Func) |
| 414 (TargetInstructionSet::X86InstructionSet_End - | 273 : Machine(Func) { |
| 415 TargetInstructionSet::X86InstructionSet_Begin), | 274 static_assert( |
| 416 "X86InstructionSet range different from TargetInstructionSet"); | 275 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) == |
| 276 (TargetInstructionSet::X86InstructionSet_End - | |
| 277 TargetInstructionSet::X86InstructionSet_Begin), | |
| 278 "Traits::InstructionSet range different from TargetInstructionSet"); | |
| 417 if (Func->getContext()->getFlags().getTargetInstructionSet() != | 279 if (Func->getContext()->getFlags().getTargetInstructionSet() != |
| 418 TargetInstructionSet::BaseInstructionSet) { | 280 TargetInstructionSet::BaseInstructionSet) { |
| 419 InstructionSet = static_cast<X86InstructionSet>( | 281 InstructionSet = static_cast<typename Traits::InstructionSet>( |
| 420 (Func->getContext()->getFlags().getTargetInstructionSet() - | 282 (Func->getContext()->getFlags().getTargetInstructionSet() - |
| 421 TargetInstructionSet::X86InstructionSet_Begin) + | 283 TargetInstructionSet::X86InstructionSet_Begin) + |
| 422 X86InstructionSet::Begin); | 284 Traits::InstructionSet::Begin); |
| 423 } | 285 } |
| 424 // TODO: Don't initialize IntegerRegisters and friends every time. | 286 // TODO: Don't initialize IntegerRegisters and friends every time. |
| 425 // Instead, initialize in some sort of static initializer for the | 287 // Instead, initialize in some sort of static initializer for the |
| 426 // class. | 288 // class. |
| 427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); | 289 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); |
| 428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); | 290 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); |
| 429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); | 291 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); |
| 430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); | 292 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); |
| 431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); | 293 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); |
| 432 ScratchRegs.resize(RegX8632::Reg_NUM); | 294 ScratchRegs.resize(RegX8632::Reg_NUM); |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 449 TypeToRegisterSet[IceType_f64] = FloatRegisters; | 311 TypeToRegisterSet[IceType_f64] = FloatRegisters; |
| 450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; | 312 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; |
| 451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; | 313 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; |
| 452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; | 314 TypeToRegisterSet[IceType_v16i1] = VectorRegisters; |
| 453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; | 315 TypeToRegisterSet[IceType_v16i8] = VectorRegisters; |
| 454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; | 316 TypeToRegisterSet[IceType_v8i16] = VectorRegisters; |
| 455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; | 317 TypeToRegisterSet[IceType_v4i32] = VectorRegisters; |
| 456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; | 318 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
| 457 } | 319 } |
| 458 | 320 |
| 459 void TargetX8632::translateO2() { | 321 template <class Machine> void TargetX86Base<Machine>::translateO2() { |
| 460 TimerMarker T(TimerStack::TT_O2, Func); | 322 TimerMarker T(TimerStack::TT_O2, Func); |
| 461 | 323 |
| 462 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 324 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
| 463 // Lower Phi instructions. | 325 // Lower Phi instructions. |
| 464 Func->placePhiLoads(); | 326 Func->placePhiLoads(); |
| 465 if (Func->hasError()) | 327 if (Func->hasError()) |
| 466 return; | 328 return; |
| 467 Func->placePhiStores(); | 329 Func->placePhiStores(); |
| 468 if (Func->hasError()) | 330 if (Func->hasError()) |
| 469 return; | 331 return; |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 561 // needed for searching for opportunities. | 423 // needed for searching for opportunities. |
| 562 Func->doBranchOpt(); | 424 Func->doBranchOpt(); |
| 563 Func->dump("After branch optimization"); | 425 Func->dump("After branch optimization"); |
| 564 | 426 |
| 565 // Nop insertion | 427 // Nop insertion |
| 566 if (Ctx->getFlags().shouldDoNopInsertion()) { | 428 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 567 Func->doNopInsertion(); | 429 Func->doNopInsertion(); |
| 568 } | 430 } |
| 569 } | 431 } |
| 570 | 432 |
| 571 void TargetX8632::translateOm1() { | 433 template <class Machine> void TargetX86Base<Machine>::translateOm1() { |
| 572 TimerMarker T(TimerStack::TT_Om1, Func); | 434 TimerMarker T(TimerStack::TT_Om1, Func); |
| 573 | 435 |
| 574 Func->placePhiLoads(); | 436 Func->placePhiLoads(); |
| 575 if (Func->hasError()) | 437 if (Func->hasError()) |
| 576 return; | 438 return; |
| 577 Func->placePhiStores(); | 439 Func->placePhiStores(); |
| 578 if (Func->hasError()) | 440 if (Func->hasError()) |
| 579 return; | 441 return; |
| 580 Func->deletePhis(); | 442 Func->deletePhis(); |
| 581 if (Func->hasError()) | 443 if (Func->hasError()) |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 598 if (Func->hasError()) | 460 if (Func->hasError()) |
| 599 return; | 461 return; |
| 600 Func->dump("After stack frame mapping"); | 462 Func->dump("After stack frame mapping"); |
| 601 | 463 |
| 602 // Nop insertion | 464 // Nop insertion |
| 603 if (Ctx->getFlags().shouldDoNopInsertion()) { | 465 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 604 Func->doNopInsertion(); | 466 Func->doNopInsertion(); |
| 605 } | 467 } |
| 606 } | 468 } |
| 607 | 469 |
| 608 namespace { | |
| 609 | |
| 610 bool canRMW(const InstArithmetic *Arith) { | 470 bool canRMW(const InstArithmetic *Arith) { |
| 611 Type Ty = Arith->getDest()->getType(); | 471 Type Ty = Arith->getDest()->getType(); |
| 612 // X86 vector instructions write to a register and have no RMW | 472 // X86 vector instructions write to a register and have no RMW |
| 613 // option. | 473 // option. |
| 614 if (isVectorType(Ty)) | 474 if (isVectorType(Ty)) |
| 615 return false; | 475 return false; |
| 616 bool isI64 = Ty == IceType_i64; | 476 bool isI64 = Ty == IceType_i64; |
| 617 | 477 |
| 618 switch (Arith->getOp()) { | 478 switch (Arith->getOp()) { |
| 619 // Not handled for lack of simple lowering: | 479 // Not handled for lack of simple lowering: |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 645 return MemA->getBase() == MemB->getBase() && | 505 return MemA->getBase() == MemB->getBase() && |
| 646 MemA->getOffset() == MemB->getOffset() && | 506 MemA->getOffset() == MemB->getOffset() && |
| 647 MemA->getIndex() == MemB->getIndex() && | 507 MemA->getIndex() == MemB->getIndex() && |
| 648 MemA->getShift() == MemB->getShift() && | 508 MemA->getShift() == MemB->getShift() && |
| 649 MemA->getSegmentRegister() == MemB->getSegmentRegister(); | 509 MemA->getSegmentRegister() == MemB->getSegmentRegister(); |
| 650 } | 510 } |
| 651 } | 511 } |
| 652 return false; | 512 return false; |
| 653 } | 513 } |
| 654 | 514 |
| 655 } // end of anonymous namespace | 515 template <class Machine> void TargetX86Base<Machine>::findRMW() { |
| 656 | |
| 657 void TargetX8632::findRMW() { | |
| 658 Func->dump("Before RMW"); | 516 Func->dump("Before RMW"); |
| 659 OstreamLocker L(Func->getContext()); | 517 OstreamLocker L(Func->getContext()); |
| 660 Ostream &Str = Func->getContext()->getStrDump(); | 518 Ostream &Str = Func->getContext()->getStrDump(); |
| 661 for (CfgNode *Node : Func->getNodes()) { | 519 for (CfgNode *Node : Func->getNodes()) { |
| 662 // Walk through the instructions, considering each sequence of 3 | 520 // Walk through the instructions, considering each sequence of 3 |
| 663 // instructions, and look for the particular RMW pattern. Note that this | 521 // instructions, and look for the particular RMW pattern. Note that this |
| 664 // search can be "broken" (false negatives) if there are intervening deleted | 522 // search can be "broken" (false negatives) if there are intervening deleted |
| 665 // instructions, or intervening instructions that could be safely moved out | 523 // instructions, or intervening instructions that could be safely moved out |
| 666 // of the way to reveal an RMW pattern. | 524 // of the way to reveal an RMW pattern. |
| 667 auto E = Node->getInsts().end(); | 525 auto E = Node->getInsts().end(); |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( | 597 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( |
| 740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); | 598 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); |
| 741 Node->getInsts().insert(I3, RMW); | 599 Node->getInsts().insert(I3, RMW); |
| 742 } | 600 } |
| 743 } | 601 } |
| 744 } | 602 } |
| 745 } | 603 } |
| 746 } | 604 } |
| 747 } | 605 } |
| 748 | 606 |
| 749 namespace { | |
| 750 | |
| 751 // Converts a ConstantInteger32 operand into its constant value, or | 607 // Converts a ConstantInteger32 operand into its constant value, or |
| 752 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 608 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 753 uint64_t getConstantMemoryOrder(Operand *Opnd) { | 609 uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 610 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 755 return Integer->getValue(); | 611 return Integer->getValue(); |
| 756 return Intrinsics::MemoryOrderInvalid; | 612 return Intrinsics::MemoryOrderInvalid; |
| 757 } | 613 } |
| 758 | 614 |
| 759 // Determines whether the dest of a Load instruction can be folded | 615 // Determines whether the dest of a Load instruction can be folded |
| 760 // into one of the src operands of a 2-operand instruction. This is | 616 // into one of the src operands of a 2-operand instruction. This is |
| 761 // true as long as the load dest matches exactly one of the binary | 617 // true as long as the load dest matches exactly one of the binary |
| 762 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if | 618 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
| 763 // the answer is true. | 619 // the answer is true. |
| 764 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 620 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
| 765 Operand *&Src0, Operand *&Src1) { | 621 Operand *&Src0, Operand *&Src1) { |
| 766 if (Src0 == LoadDest && Src1 != LoadDest) { | 622 if (Src0 == LoadDest && Src1 != LoadDest) { |
| 767 Src0 = LoadSrc; | 623 Src0 = LoadSrc; |
| 768 return true; | 624 return true; |
| 769 } | 625 } |
| 770 if (Src0 != LoadDest && Src1 == LoadDest) { | 626 if (Src0 != LoadDest && Src1 == LoadDest) { |
| 771 Src1 = LoadSrc; | 627 Src1 = LoadSrc; |
| 772 return true; | 628 return true; |
| 773 } | 629 } |
| 774 return false; | 630 return false; |
| 775 } | 631 } |
| 776 | 632 |
| 777 } // end of anonymous namespace | 633 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { |
| 778 | |
| 779 void TargetX8632::doLoadOpt() { | |
| 780 for (CfgNode *Node : Func->getNodes()) { | 634 for (CfgNode *Node : Func->getNodes()) { |
| 781 Context.init(Node); | 635 Context.init(Node); |
| 782 while (!Context.atEnd()) { | 636 while (!Context.atEnd()) { |
| 783 Variable *LoadDest = nullptr; | 637 Variable *LoadDest = nullptr; |
| 784 Operand *LoadSrc = nullptr; | 638 Operand *LoadSrc = nullptr; |
| 785 Inst *CurInst = Context.getCur(); | 639 Inst *CurInst = Context.getCur(); |
| 786 Inst *Next = Context.getNextInst(); | 640 Inst *Next = Context.getNextInst(); |
| 787 // Determine whether the current instruction is a Load | 641 // Determine whether the current instruction is a Load |
| 788 // instruction or equivalent. | 642 // instruction or equivalent. |
| 789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 643 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 859 NewInst->spliceLivenessInfo(Next, CurInst); | 713 NewInst->spliceLivenessInfo(Next, CurInst); |
| 860 } | 714 } |
| 861 } | 715 } |
| 862 Context.advanceCur(); | 716 Context.advanceCur(); |
| 863 Context.advanceNext(); | 717 Context.advanceNext(); |
| 864 } | 718 } |
| 865 } | 719 } |
| 866 Func->dump("After load optimization"); | 720 Func->dump("After load optimization"); |
| 867 } | 721 } |
| 868 | 722 |
| 869 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 723 template <class Machine> |
| 724 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | |
| 870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 725 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
| 871 return Br->optimizeBranch(NextNode); | 726 return Br->optimizeBranch(NextNode); |
| 872 } | 727 } |
| 873 return false; | 728 return false; |
| 874 } | 729 } |
| 875 | 730 |
| 876 IceString TargetX8632::RegNames[] = { | 731 template <class Machine> |
| 732 IceString TargetX86Base<Machine>::RegNames[] = { | |
| 877 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 733 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 878 frameptr, isI8, isInt, isFP) \ | 734 frameptr, isI8, isInt, isFP) \ |
| 879 name, | 735 name, |
| 880 REGX8632_TABLE | 736 REGX8632_TABLE |
| 881 #undef X | 737 #undef X |
| 882 }; | 738 }; |
| 883 | 739 |
| 884 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) { | 740 template <class Machine> |
| 741 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | |
| 885 if (Ty == IceType_void) | 742 if (Ty == IceType_void) |
| 886 Ty = IceType_i32; | 743 Ty = IceType_i32; |
| 887 if (PhysicalRegisters[Ty].empty()) | 744 if (PhysicalRegisters[Ty].empty()) |
| 888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); | 745 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); |
| 889 assert(RegNum < PhysicalRegisters[Ty].size()); | 746 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 890 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 747 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 891 if (Reg == nullptr) { | 748 if (Reg == nullptr) { |
| 892 Reg = Func->makeVariable(Ty); | 749 Reg = Func->makeVariable(Ty); |
| 893 Reg->setRegNum(RegNum); | 750 Reg->setRegNum(RegNum); |
| 894 PhysicalRegisters[Ty][RegNum] = Reg; | 751 PhysicalRegisters[Ty][RegNum] = Reg; |
| 895 // Specially mark esp as an "argument" so that it is considered | 752 // Specially mark esp as an "argument" so that it is considered |
| 896 // live upon function entry. | 753 // live upon function entry. |
| 897 if (RegNum == RegX8632::Reg_esp) { | 754 if (RegNum == RegX8632::Reg_esp) { |
| 898 Func->addImplicitArg(Reg); | 755 Func->addImplicitArg(Reg); |
| 899 Reg->setIgnoreLiveness(); | 756 Reg->setIgnoreLiveness(); |
| 900 } | 757 } |
| 901 } | 758 } |
| 902 return Reg; | 759 return Reg; |
| 903 } | 760 } |
| 904 | 761 |
| 905 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { | 762 template <class Machine> |
| 763 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | |
| 906 assert(RegNum < RegX8632::Reg_NUM); | 764 assert(RegNum < RegX8632::Reg_NUM); |
| 907 static IceString RegNames8[] = { | 765 static IceString RegNames8[] = { |
| 908 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 766 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 909 frameptr, isI8, isInt, isFP) \ | 767 frameptr, isI8, isInt, isFP) \ |
| 910 name8, | 768 name8, |
| 911 REGX8632_TABLE | 769 REGX8632_TABLE |
| 912 #undef X | 770 #undef X |
| 913 }; | 771 }; |
| 914 static IceString RegNames16[] = { | 772 static IceString RegNames16[] = { |
| 915 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 773 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 916 frameptr, isI8, isInt, isFP) \ | 774 frameptr, isI8, isInt, isFP) \ |
| 917 name16, | 775 name16, |
| 918 REGX8632_TABLE | 776 REGX8632_TABLE |
| 919 #undef X | 777 #undef X |
| 920 }; | 778 }; |
| 921 switch (Ty) { | 779 switch (Ty) { |
| 922 case IceType_i1: | 780 case IceType_i1: |
| 923 case IceType_i8: | 781 case IceType_i8: |
| 924 return RegNames8[RegNum]; | 782 return RegNames8[RegNum]; |
| 925 case IceType_i16: | 783 case IceType_i16: |
| 926 return RegNames16[RegNum]; | 784 return RegNames16[RegNum]; |
| 927 default: | 785 default: |
| 928 return RegNames[RegNum]; | 786 return RegNames[RegNum]; |
| 929 } | 787 } |
| 930 } | 788 } |
| 931 | 789 |
| 932 void TargetX8632::emitVariable(const Variable *Var) const { | 790 template <class Machine> |
| 791 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | |
| 933 Ostream &Str = Ctx->getStrEmit(); | 792 Ostream &Str = Ctx->getStrEmit(); |
| 934 if (Var->hasReg()) { | 793 if (Var->hasReg()) { |
| 935 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 794 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
| 936 return; | 795 return; |
| 937 } | 796 } |
| 938 if (Var->getWeight().isInf()) { | 797 if (Var->getWeight().isInf()) { |
| 939 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 798 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 940 } | 799 } |
| 941 int32_t Offset = Var->getStackOffset(); | 800 int32_t Offset = Var->getStackOffset(); |
| 942 if (!hasFramePointer()) | 801 if (!hasFramePointer()) |
| 943 Offset += getStackAdjustment(); | 802 Offset += getStackAdjustment(); |
| 944 if (Offset) | 803 if (Offset) |
| 945 Str << Offset; | 804 Str << Offset; |
| 946 const Type FrameSPTy = IceType_i32; | 805 const Type FrameSPTy = IceType_i32; |
| 947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; | 806 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; |
| 948 } | 807 } |
| 949 | 808 |
| 950 X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const { | 809 template <class Machine> |
| 810 X8632::Address | |
| 811 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | |
| 951 if (Var->hasReg()) | 812 if (Var->hasReg()) |
| 952 llvm_unreachable("Stack Variable has a register assigned"); | 813 llvm_unreachable("Stack Variable has a register assigned"); |
| 953 if (Var->getWeight().isInf()) { | 814 if (Var->getWeight().isInf()) { |
| 954 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 815 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 955 } | 816 } |
| 956 int32_t Offset = Var->getStackOffset(); | 817 int32_t Offset = Var->getStackOffset(); |
| 957 if (!hasFramePointer()) | 818 if (!hasFramePointer()) |
| 958 Offset += getStackAdjustment(); | 819 Offset += getStackAdjustment(); |
| 959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); | 820 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); |
| 960 } | 821 } |
| 961 | 822 |
| 962 void TargetX8632::lowerArguments() { | 823 template <class Machine> void TargetX86Base<Machine>::lowerArguments() { |
| 963 VarList &Args = Func->getArgs(); | 824 VarList &Args = Func->getArgs(); |
| 964 // The first four arguments of vector type, regardless of their | 825 // The first four arguments of vector type, regardless of their |
| 965 // position relative to the other arguments in the argument list, are | 826 // position relative to the other arguments in the argument list, are |
| 966 // passed in registers xmm0 - xmm3. | 827 // passed in registers xmm0 - xmm3. |
| 967 unsigned NumXmmArgs = 0; | 828 unsigned NumXmmArgs = 0; |
| 968 | 829 |
| 969 Context.init(Func->getEntryNode()); | 830 Context.init(Func->getEntryNode()); |
| 970 Context.setInsertPoint(Context.getCur()); | 831 Context.setInsertPoint(Context.getCur()); |
| 971 | 832 |
| 972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; | 833 for (SizeT I = 0, E = Args.size(); |
| 973 ++I) { | 834 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { |
| 974 Variable *Arg = Args[I]; | 835 Variable *Arg = Args[I]; |
| 975 Type Ty = Arg->getType(); | 836 Type Ty = Arg->getType(); |
| 976 if (!isVectorType(Ty)) | 837 if (!isVectorType(Ty)) |
| 977 continue; | 838 continue; |
| 978 // Replace Arg in the argument list with the home register. Then | 839 // Replace Arg in the argument list with the home register. Then |
| 979 // generate an instruction in the prolog to copy the home register | 840 // generate an instruction in the prolog to copy the home register |
| 980 // to the assigned location of Arg. | 841 // to the assigned location of Arg. |
| 981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; | 842 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; |
| 982 ++NumXmmArgs; | 843 ++NumXmmArgs; |
| 983 Variable *RegisterArg = Func->makeVariable(Ty); | 844 Variable *RegisterArg = Func->makeVariable(Ty); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 994 | 855 |
| 995 // Helper function for addProlog(). | 856 // Helper function for addProlog(). |
| 996 // | 857 // |
| 997 // This assumes Arg is an argument passed on the stack. This sets the | 858 // This assumes Arg is an argument passed on the stack. This sets the |
| 998 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 859 // frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 999 // width. For an I64 arg that has been split into Lo and Hi components, | 860 // width. For an I64 arg that has been split into Lo and Hi components, |
| 1000 // it calls itself recursively on the components, taking care to handle | 861 // it calls itself recursively on the components, taking care to handle |
| 1001 // Lo first because of the little-endian architecture. Lastly, this | 862 // Lo first because of the little-endian architecture. Lastly, this |
| 1002 // function generates an instruction to copy Arg into its assigned | 863 // function generates an instruction to copy Arg into its assigned |
| 1003 // register if applicable. | 864 // register if applicable. |
| 1004 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 865 template <class Machine> |
| 1005 size_t BasicFrameOffset, | 866 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 1006 size_t &InArgsSizeBytes) { | 867 Variable *FramePtr, |
| 868 size_t BasicFrameOffset, | |
| 869 size_t &InArgsSizeBytes) { | |
| 1007 Variable *Lo = Arg->getLo(); | 870 Variable *Lo = Arg->getLo(); |
| 1008 Variable *Hi = Arg->getHi(); | 871 Variable *Hi = Arg->getHi(); |
| 1009 Type Ty = Arg->getType(); | 872 Type Ty = Arg->getType(); |
| 1010 if (Lo && Hi && Ty == IceType_i64) { | 873 if (Lo && Hi && Ty == IceType_i64) { |
| 1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 874 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 875 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 1013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 876 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 877 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 1015 return; | 878 return; |
| 1016 } | 879 } |
| 1017 if (isVectorType(Ty)) { | 880 if (isVectorType(Ty)) { |
| 1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); | 881 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 1019 } | 882 } |
| 1020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 883 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 1021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 884 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 1022 if (Arg->hasReg()) { | 885 if (Arg->hasReg()) { |
| 1023 assert(Ty != IceType_i64); | 886 assert(Ty != IceType_i64); |
| 1024 OperandX8632Mem *Mem = OperandX8632Mem::create( | 887 OperandX8632Mem *Mem = OperandX8632Mem::create( |
| 1025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 888 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
| 1026 if (isVectorType(Arg->getType())) { | 889 if (isVectorType(Arg->getType())) { |
| 1027 _movp(Arg, Mem); | 890 _movp(Arg, Mem); |
| 1028 } else { | 891 } else { |
| 1029 _mov(Arg, Mem); | 892 _mov(Arg, Mem); |
| 1030 } | 893 } |
| 1031 // This argument-copying instruction uses an explicit | 894 // This argument-copying instruction uses an explicit |
| 1032 // OperandX8632Mem operand instead of a Variable, so its | 895 // OperandX8632Mem operand instead of a Variable, so its |
| 1033 // fill-from-stack operation has to be tracked separately for | 896 // fill-from-stack operation has to be tracked separately for |
| 1034 // statistics. | 897 // statistics. |
| 1035 Ctx->statsUpdateFills(); | 898 Ctx->statsUpdateFills(); |
| 1036 } | 899 } |
| 1037 } | 900 } |
| 1038 | 901 |
| 1039 Type TargetX8632::stackSlotType() { return IceType_i32; } | 902 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 903 return IceType_i32; | |
| 904 } | |
| 1040 | 905 |
| 1041 void TargetX8632::addProlog(CfgNode *Node) { | 906 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { |
| 1042 // Stack frame layout: | 907 // Stack frame layout: |
| 1043 // | 908 // |
| 1044 // +------------------------+ | 909 // +------------------------+ |
| 1045 // | 1. return address | | 910 // | 1. return address | |
| 1046 // +------------------------+ | 911 // +------------------------+ |
| 1047 // | 2. preserved registers | | 912 // | 2. preserved registers | |
| 1048 // +------------------------+ | 913 // +------------------------+ |
| 1049 // | 3. padding | | 914 // | 3. padding | |
| 1050 // +------------------------+ | 915 // +------------------------+ |
| 1051 // | 4. global spill area | | 916 // | 4. global spill area | |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1140 _mov(ebp, esp); | 1005 _mov(ebp, esp); |
| 1141 // Keep ebp live for late-stage liveness analysis | 1006 // Keep ebp live for late-stage liveness analysis |
| 1142 // (e.g. asm-verbose mode). | 1007 // (e.g. asm-verbose mode). |
| 1143 Context.insert(InstFakeUse::create(Func, ebp)); | 1008 Context.insert(InstFakeUse::create(Func, ebp)); |
| 1144 } | 1009 } |
| 1145 | 1010 |
| 1146 // Align the variables area. SpillAreaPaddingBytes is the size of | 1011 // Align the variables area. SpillAreaPaddingBytes is the size of |
| 1147 // the region after the preserved registers and before the spill areas. | 1012 // the region after the preserved registers and before the spill areas. |
| 1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals | 1013 // LocalsSlotsPaddingBytes is the amount of padding between the globals |
| 1149 // and locals area if they are separate. | 1014 // and locals area if they are separate. |
| 1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); | 1015 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); |
| 1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 1016 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| 1152 uint32_t SpillAreaPaddingBytes = 0; | 1017 uint32_t SpillAreaPaddingBytes = 0; |
| 1153 uint32_t LocalsSlotsPaddingBytes = 0; | 1018 uint32_t LocalsSlotsPaddingBytes = 0; |
| 1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, | 1019 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, |
| 1155 SpillAreaAlignmentBytes, GlobalsSize, | 1020 SpillAreaAlignmentBytes, GlobalsSize, |
| 1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, | 1021 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, |
| 1157 &LocalsSlotsPaddingBytes); | 1022 &LocalsSlotsPaddingBytes); |
| 1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 1023 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| 1159 uint32_t GlobalsAndSubsequentPaddingSize = | 1024 uint32_t GlobalsAndSubsequentPaddingSize = |
| 1160 GlobalsSize + LocalsSlotsPaddingBytes; | 1025 GlobalsSize + LocalsSlotsPaddingBytes; |
| 1161 | 1026 |
| 1162 // Align esp if necessary. | 1027 // Align esp if necessary. |
| 1163 if (NeedsStackAlignment) { | 1028 if (NeedsStackAlignment) { |
| 1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 1029 uint32_t StackOffset = |
| 1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 1030 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 1031 uint32_t StackSize = | |
| 1032 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | |
| 1166 SpillAreaSizeBytes = StackSize - StackOffset; | 1033 SpillAreaSizeBytes = StackSize - StackOffset; |
| 1167 } | 1034 } |
| 1168 | 1035 |
| 1169 // Generate "sub esp, SpillAreaSizeBytes" | 1036 // Generate "sub esp, SpillAreaSizeBytes" |
| 1170 if (SpillAreaSizeBytes) | 1037 if (SpillAreaSizeBytes) |
| 1171 _sub(getPhysicalRegister(RegX8632::Reg_esp), | 1038 _sub(getPhysicalRegister(RegX8632::Reg_esp), |
| 1172 Ctx->getConstantInt32(SpillAreaSizeBytes)); | 1039 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 1173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1040 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 1174 | 1041 |
| 1175 resetStackAdjustment(); | 1042 resetStackAdjustment(); |
| 1176 | 1043 |
| 1177 // Fill in stack offsets for stack args, and copy args into registers | 1044 // Fill in stack offsets for stack args, and copy args into registers |
| 1178 // for those that were register-allocated. Args are pushed right to | 1045 // for those that were register-allocated. Args are pushed right to |
| 1179 // left, so Arg[0] is closest to the stack/frame pointer. | 1046 // left, so Arg[0] is closest to the stack/frame pointer. |
| 1180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1047 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 1181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 1048 size_t BasicFrameOffset = |
| 1049 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | |
| 1182 if (!IsEbpBasedFrame) | 1050 if (!IsEbpBasedFrame) |
| 1183 BasicFrameOffset += SpillAreaSizeBytes; | 1051 BasicFrameOffset += SpillAreaSizeBytes; |
| 1184 | 1052 |
| 1185 const VarList &Args = Func->getArgs(); | 1053 const VarList &Args = Func->getArgs(); |
| 1186 size_t InArgsSizeBytes = 0; | 1054 size_t InArgsSizeBytes = 0; |
| 1187 unsigned NumXmmArgs = 0; | 1055 unsigned NumXmmArgs = 0; |
| 1188 for (Variable *Arg : Args) { | 1056 for (Variable *Arg : Args) { |
| 1189 // Skip arguments passed in registers. | 1057 // Skip arguments passed in registers. |
| 1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 1058 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { |
| 1191 ++NumXmmArgs; | 1059 ++NumXmmArgs; |
| 1192 continue; | 1060 continue; |
| 1193 } | 1061 } |
| 1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 1062 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 1195 } | 1063 } |
| 1196 | 1064 |
| 1197 // Fill in stack offsets for locals. | 1065 // Fill in stack offsets for locals. |
| 1198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, | 1066 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| 1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, | 1067 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| 1200 IsEbpBasedFrame); | 1068 IsEbpBasedFrame); |
| 1201 // Assign stack offsets to variables that have been linked to spilled | 1069 // Assign stack offsets to variables that have been linked to spilled |
| 1202 // variables. | 1070 // variables. |
| 1203 for (Variable *Var : VariablesLinkedToSpillSlots) { | 1071 for (Variable *Var : VariablesLinkedToSpillSlots) { |
| 1204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); | 1072 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); |
| 1205 Var->setStackOffset(Linked->getStackOffset()); | 1073 Var->setStackOffset(Linked->getStackOffset()); |
| 1206 } | 1074 } |
| 1207 this->HasComputedFrame = true; | 1075 this->HasComputedFrame = true; |
| 1208 | 1076 |
| 1209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { | 1077 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { |
| 1210 OstreamLocker L(Func->getContext()); | 1078 OstreamLocker L(Func->getContext()); |
| 1211 Ostream &Str = Func->getContext()->getStrDump(); | 1079 Ostream &Str = Func->getContext()->getStrDump(); |
| 1212 | 1080 |
| 1213 Str << "Stack layout:\n"; | 1081 Str << "Stack layout:\n"; |
| 1214 uint32_t EspAdjustmentPaddingSize = | 1082 uint32_t EspAdjustmentPaddingSize = |
| 1215 SpillAreaSizeBytes - LocalsSpillAreaSize - | 1083 SpillAreaSizeBytes - LocalsSpillAreaSize - |
| 1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | 1084 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; |
| 1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 1085 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| 1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n" | 1086 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| 1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | 1087 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| 1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | 1088 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| 1221 << " globals spill area = " << GlobalsSize << " bytes\n" | 1089 << " globals spill area = " << GlobalsSize << " bytes\n" |
| 1222 << " globals-locals spill areas intermediate padding = " | 1090 << " globals-locals spill areas intermediate padding = " |
| 1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | 1091 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| 1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | 1092 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| 1225 << " esp alignment padding = " << EspAdjustmentPaddingSize | 1093 << " esp alignment padding = " << EspAdjustmentPaddingSize |
| 1226 << " bytes\n"; | 1094 << " bytes\n"; |
| 1227 | 1095 |
| 1228 Str << "Stack details:\n" | 1096 Str << "Stack details:\n" |
| 1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | 1097 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| 1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | 1098 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| 1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | 1099 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| 1232 << " bytes\n" | 1100 << " bytes\n" |
| 1233 << " is ebp based = " << IsEbpBasedFrame << "\n"; | 1101 << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| 1234 } | 1102 } |
| 1235 } | 1103 } |
| 1236 | 1104 |
| 1237 void TargetX8632::addEpilog(CfgNode *Node) { | 1105 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) { |
| 1238 InstList &Insts = Node->getInsts(); | 1106 InstList &Insts = Node->getInsts(); |
| 1239 InstList::reverse_iterator RI, E; | 1107 InstList::reverse_iterator RI, E; |
| 1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 1108 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| 1241 if (llvm::isa<InstX8632Ret>(*RI)) | 1109 if (llvm::isa<InstX8632Ret>(*RI)) |
| 1242 break; | 1110 break; |
| 1243 } | 1111 } |
| 1244 if (RI == E) | 1112 if (RI == E) |
| 1245 return; | 1113 return; |
| 1246 | 1114 |
| 1247 // Convert the reverse_iterator position into its corresponding | 1115 // Convert the reverse_iterator position into its corresponding |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1280 | 1148 |
| 1281 if (!Ctx->getFlags().getUseSandboxing()) | 1149 if (!Ctx->getFlags().getUseSandboxing()) |
| 1282 return; | 1150 return; |
| 1283 // Change the original ret instruction into a sandboxed return sequence. | 1151 // Change the original ret instruction into a sandboxed return sequence. |
| 1284 // t:ecx = pop | 1152 // t:ecx = pop |
| 1285 // bundle_lock | 1153 // bundle_lock |
| 1286 // and t, ~31 | 1154 // and t, ~31 |
| 1287 // jmp *t | 1155 // jmp *t |
| 1288 // bundle_unlock | 1156 // bundle_unlock |
| 1289 // FakeUse <original_ret_operand> | 1157 // FakeUse <original_ret_operand> |
| 1290 const SizeT BundleSize = 1 | 1158 const SizeT BundleSize = |
| 1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 1159 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 1160 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
| 1293 _pop(T_ecx); | 1161 _pop(T_ecx); |
| 1294 _bundle_lock(); | 1162 _bundle_lock(); |
| 1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | 1163 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 1296 _jmp(T_ecx); | 1164 _jmp(T_ecx); |
| 1297 _bundle_unlock(); | 1165 _bundle_unlock(); |
| 1298 if (RI->getSrcSize()) { | 1166 if (RI->getSrcSize()) { |
| 1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1167 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1300 Context.insert(InstFakeUse::create(Func, RetValue)); | 1168 Context.insert(InstFakeUse::create(Func, RetValue)); |
| 1301 } | 1169 } |
| 1302 RI->setDeleted(); | 1170 RI->setDeleted(); |
| 1303 } | 1171 } |
| 1304 | 1172 |
| 1305 void TargetX8632::split64(Variable *Var) { | 1173 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { |
| 1306 switch (Var->getType()) { | 1174 switch (Var->getType()) { |
| 1307 default: | 1175 default: |
| 1308 return; | 1176 return; |
| 1309 case IceType_i64: | 1177 case IceType_i64: |
| 1310 // TODO: Only consider F64 if we need to push each half when | 1178 // TODO: Only consider F64 if we need to push each half when |
| 1311 // passing as an argument to a function call. Note that each half | 1179 // passing as an argument to a function call. Note that each half |
| 1312 // is still typed as I32. | 1180 // is still typed as I32. |
| 1313 case IceType_f64: | 1181 case IceType_f64: |
| 1314 break; | 1182 break; |
| 1315 } | 1183 } |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 1326 Lo->setName(Func, Var->getName(Func) + "__lo"); | 1194 Lo->setName(Func, Var->getName(Func) + "__lo"); |
| 1327 Hi->setName(Func, Var->getName(Func) + "__hi"); | 1195 Hi->setName(Func, Var->getName(Func) + "__hi"); |
| 1328 } | 1196 } |
| 1329 Var->setLoHi(Lo, Hi); | 1197 Var->setLoHi(Lo, Hi); |
| 1330 if (Var->getIsArg()) { | 1198 if (Var->getIsArg()) { |
| 1331 Lo->setIsArg(); | 1199 Lo->setIsArg(); |
| 1332 Hi->setIsArg(); | 1200 Hi->setIsArg(); |
| 1333 } | 1201 } |
| 1334 } | 1202 } |
| 1335 | 1203 |
| 1336 Operand *TargetX8632::loOperand(Operand *Operand) { | 1204 template <class Machine> |
| 1205 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { | |
| 1337 assert(Operand->getType() == IceType_i64 || | 1206 assert(Operand->getType() == IceType_i64 || |
| 1338 Operand->getType() == IceType_f64); | 1207 Operand->getType() == IceType_f64); |
| 1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 1208 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 1340 return Operand; | 1209 return Operand; |
| 1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | 1210 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 1342 split64(Var); | 1211 split64(Var); |
| 1343 return Var->getLo(); | 1212 return Var->getLo(); |
| 1344 } | 1213 } |
| 1345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1214 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 1346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 1215 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 1216 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
| 1348 return legalize(ConstInt); | 1217 return legalize(ConstInt); |
| 1349 } | 1218 } |
| 1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | 1219 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { |
| 1351 OperandX8632Mem *MemOperand = OperandX8632Mem::create( | 1220 OperandX8632Mem *MemOperand = OperandX8632Mem::create( |
| 1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 1221 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
| 1353 Mem->getShift(), Mem->getSegmentRegister()); | 1222 Mem->getShift(), Mem->getSegmentRegister()); |
| 1354 // Test if we should randomize or pool the offset, if so randomize it or | 1223 // Test if we should randomize or pool the offset, if so randomize it or |
| 1355 // pool it then create mem operand with the blinded/pooled constant. | 1224 // pool it then create mem operand with the blinded/pooled constant. |
| 1356 // Otherwise, return the mem operand as ordinary mem operand. | 1225 // Otherwise, return the mem operand as ordinary mem operand. |
| 1357 return legalize(MemOperand); | 1226 return legalize(MemOperand); |
| 1358 } | 1227 } |
| 1359 llvm_unreachable("Unsupported operand type"); | 1228 llvm_unreachable("Unsupported operand type"); |
| 1360 return nullptr; | 1229 return nullptr; |
| 1361 } | 1230 } |
| 1362 | 1231 |
| 1363 Operand *TargetX8632::hiOperand(Operand *Operand) { | 1232 template <class Machine> |
| 1233 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { | |
| 1364 assert(Operand->getType() == IceType_i64 || | 1234 assert(Operand->getType() == IceType_i64 || |
| 1365 Operand->getType() == IceType_f64); | 1235 Operand->getType() == IceType_f64); |
| 1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 1236 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 1367 return Operand; | 1237 return Operand; |
| 1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | 1238 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 1369 split64(Var); | 1239 split64(Var); |
| 1370 return Var->getHi(); | 1240 return Var->getHi(); |
| 1371 } | 1241 } |
| 1372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1242 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 1373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 1243 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| (...skipping 20 matching lines...) Expand all Loading... | |
| 1394 Mem->getShift(), Mem->getSegmentRegister()); | 1264 Mem->getShift(), Mem->getSegmentRegister()); |
| 1395 // Test if the Offset is an eligible i32 constants for randomization and | 1265 // Test if the Offset is an eligible i32 constants for randomization and |
| 1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem | 1266 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem |
| 1397 // operand. | 1267 // operand. |
| 1398 return legalize(MemOperand); | 1268 return legalize(MemOperand); |
| 1399 } | 1269 } |
| 1400 llvm_unreachable("Unsupported operand type"); | 1270 llvm_unreachable("Unsupported operand type"); |
| 1401 return nullptr; | 1271 return nullptr; |
| 1402 } | 1272 } |
| 1403 | 1273 |
| 1404 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, | 1274 template <class Machine> |
| 1405 RegSetMask Exclude) const { | 1275 llvm::SmallBitVector |
| 1276 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | |
| 1277 RegSetMask Exclude) const { | |
| 1406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); | 1278 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); |
| 1407 | 1279 |
| 1408 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 1280 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 1409 frameptr, isI8, isInt, isFP) \ | 1281 frameptr, isI8, isInt, isFP) \ |
| 1410 if (scratch && (Include & RegSet_CallerSave)) \ | 1282 if (scratch && (Include & RegSet_CallerSave)) \ |
| 1411 Registers[RegX8632::val] = true; \ | 1283 Registers[RegX8632::val] = true; \ |
| 1412 if (preserved && (Include & RegSet_CalleeSave)) \ | 1284 if (preserved && (Include & RegSet_CalleeSave)) \ |
| 1413 Registers[RegX8632::val] = true; \ | 1285 Registers[RegX8632::val] = true; \ |
| 1414 if (stackptr && (Include & RegSet_StackPointer)) \ | 1286 if (stackptr && (Include & RegSet_StackPointer)) \ |
| 1415 Registers[RegX8632::val] = true; \ | 1287 Registers[RegX8632::val] = true; \ |
| 1416 if (frameptr && (Include & RegSet_FramePointer)) \ | 1288 if (frameptr && (Include & RegSet_FramePointer)) \ |
| 1417 Registers[RegX8632::val] = true; \ | 1289 Registers[RegX8632::val] = true; \ |
| 1418 if (scratch && (Exclude & RegSet_CallerSave)) \ | 1290 if (scratch && (Exclude & RegSet_CallerSave)) \ |
| 1419 Registers[RegX8632::val] = false; \ | 1291 Registers[RegX8632::val] = false; \ |
| 1420 if (preserved && (Exclude & RegSet_CalleeSave)) \ | 1292 if (preserved && (Exclude & RegSet_CalleeSave)) \ |
| 1421 Registers[RegX8632::val] = false; \ | 1293 Registers[RegX8632::val] = false; \ |
| 1422 if (stackptr && (Exclude & RegSet_StackPointer)) \ | 1294 if (stackptr && (Exclude & RegSet_StackPointer)) \ |
| 1423 Registers[RegX8632::val] = false; \ | 1295 Registers[RegX8632::val] = false; \ |
| 1424 if (frameptr && (Exclude & RegSet_FramePointer)) \ | 1296 if (frameptr && (Exclude & RegSet_FramePointer)) \ |
| 1425 Registers[RegX8632::val] = false; | 1297 Registers[RegX8632::val] = false; |
| 1426 | 1298 |
| 1427 REGX8632_TABLE | 1299 REGX8632_TABLE |
| 1428 | 1300 |
| 1429 #undef X | 1301 #undef X |
| 1430 | 1302 |
| 1431 return Registers; | 1303 return Registers; |
| 1432 } | 1304 } |
| 1433 | 1305 |
| 1434 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 1306 template <class Machine> |
| 1307 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | |
| 1435 IsEbpBasedFrame = true; | 1308 IsEbpBasedFrame = true; |
| 1436 // Conservatively require the stack to be aligned. Some stack | 1309 // Conservatively require the stack to be aligned. Some stack |
| 1437 // adjustment operations implemented below assume that the stack is | 1310 // adjustment operations implemented below assume that the stack is |
| 1438 // aligned before the alloca. All the alloca code ensures that the | 1311 // aligned before the alloca. All the alloca code ensures that the |
| 1439 // stack alignment is preserved after the alloca. The stack alignment | 1312 // stack alignment is preserved after the alloca. The stack alignment |
| 1440 // restriction can be relaxed in some cases. | 1313 // restriction can be relaxed in some cases. |
| 1441 NeedsStackAlignment = true; | 1314 NeedsStackAlignment = true; |
| 1442 | 1315 |
| 1443 // TODO(stichnot): minimize the number of adjustments of esp, etc. | 1316 // TODO(stichnot): minimize the number of adjustments of esp, etc. |
| 1444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); | 1317 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); |
| 1445 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1318 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
| 1446 Variable *Dest = Inst->getDest(); | 1319 Variable *Dest = Inst->getDest(); |
| 1447 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 1320 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
| 1448 // For default align=0, set it to the real value 1, to avoid any | 1321 // For default align=0, set it to the real value 1, to avoid any |
| 1449 // bit-manipulation problems below. | 1322 // bit-manipulation problems below. |
| 1450 AlignmentParam = std::max(AlignmentParam, 1u); | 1323 AlignmentParam = std::max(AlignmentParam, 1u); |
| 1451 | 1324 |
| 1452 // LLVM enforces power of 2 alignment. | 1325 // LLVM enforces power of 2 alignment. |
| 1453 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1326 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES)); | 1327 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
| 1455 | 1328 |
| 1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); | 1329 uint32_t Alignment = |
| 1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { | 1330 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
| 1331 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { | |
| 1458 _and(esp, Ctx->getConstantInt32(-Alignment)); | 1332 _and(esp, Ctx->getConstantInt32(-Alignment)); |
| 1459 } | 1333 } |
| 1460 if (const auto *ConstantTotalSize = | 1334 if (const auto *ConstantTotalSize = |
| 1461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1335 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 1462 uint32_t Value = ConstantTotalSize->getValue(); | 1336 uint32_t Value = ConstantTotalSize->getValue(); |
| 1463 Value = Utils::applyAlignment(Value, Alignment); | 1337 Value = Utils::applyAlignment(Value, Alignment); |
| 1464 _sub(esp, Ctx->getConstantInt32(Value)); | 1338 _sub(esp, Ctx->getConstantInt32(Value)); |
| 1465 } else { | 1339 } else { |
| 1466 // Non-constant sizes need to be adjusted to the next highest | 1340 // Non-constant sizes need to be adjusted to the next highest |
| 1467 // multiple of the required alignment at runtime. | 1341 // multiple of the required alignment at runtime. |
| 1468 Variable *T = makeReg(IceType_i32); | 1342 Variable *T = makeReg(IceType_i32); |
| 1469 _mov(T, TotalSize); | 1343 _mov(T, TotalSize); |
| 1470 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1344 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
| 1471 _and(T, Ctx->getConstantInt32(-Alignment)); | 1345 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1472 _sub(esp, T); | 1346 _sub(esp, T); |
| 1473 } | 1347 } |
| 1474 _mov(Dest, esp); | 1348 _mov(Dest, esp); |
| 1475 } | 1349 } |
| 1476 | 1350 |
| 1477 // Strength-reduce scalar integer multiplication by a constant (for | 1351 // Strength-reduce scalar integer multiplication by a constant (for |
| 1478 // i32 or narrower) for certain constants. The lea instruction can be | 1352 // i32 or narrower) for certain constants. The lea instruction can be |
| 1479 // used to multiply by 3, 5, or 9, and the lsh instruction can be used | 1353 // used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| 1480 // to multiply by powers of 2. These can be combined such that | 1354 // to multiply by powers of 2. These can be combined such that |
| 1481 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, | 1355 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| 1482 // combined with left-shifting by 2. | 1356 // combined with left-shifting by 2. |
| 1483 bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1357 template <class Machine> |
| 1484 int32_t Src1) { | 1358 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1359 int32_t Src1) { | |
| 1485 // Disable this optimization for Om1 and O0, just to keep things | 1360 // Disable this optimization for Om1 and O0, just to keep things |
| 1486 // simple there. | 1361 // simple there. |
| 1487 if (Ctx->getFlags().getOptLevel() < Opt_1) | 1362 if (Ctx->getFlags().getOptLevel() < Opt_1) |
| 1488 return false; | 1363 return false; |
| 1489 Type Ty = Dest->getType(); | 1364 Type Ty = Dest->getType(); |
| 1490 Variable *T = nullptr; | 1365 Variable *T = nullptr; |
| 1491 if (Src1 == -1) { | 1366 if (Src1 == -1) { |
| 1492 _mov(T, Src0); | 1367 _mov(T, Src0); |
| 1493 _neg(T); | 1368 _neg(T); |
| 1494 _mov(Dest, T); | 1369 _mov(Dest, T); |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1563 } | 1438 } |
| 1564 if (Count2) { | 1439 if (Count2) { |
| 1565 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1440 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
| 1566 } | 1441 } |
| 1567 if (Src1IsNegative) | 1442 if (Src1IsNegative) |
| 1568 _neg(T); | 1443 _neg(T); |
| 1569 _mov(Dest, T); | 1444 _mov(Dest, T); |
| 1570 return true; | 1445 return true; |
| 1571 } | 1446 } |
| 1572 | 1447 |
| 1573 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1448 template <class Machine> |
| 1449 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | |
| 1574 Variable *Dest = Inst->getDest(); | 1450 Variable *Dest = Inst->getDest(); |
| 1575 Operand *Src0 = legalize(Inst->getSrc(0)); | 1451 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1576 Operand *Src1 = legalize(Inst->getSrc(1)); | 1452 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1577 if (Inst->isCommutative()) { | 1453 if (Inst->isCommutative()) { |
| 1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | 1454 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) |
| 1579 std::swap(Src0, Src1); | 1455 std::swap(Src0, Src1); |
| 1580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) | 1456 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) |
| 1581 std::swap(Src0, Src1); | 1457 std::swap(Src0, Src1); |
| 1582 } | 1458 } |
| 1583 if (Dest->getType() == IceType_i64) { | 1459 if (Dest->getType() == IceType_i64) { |
| (...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1866 case InstArithmetic::Sub: { | 1742 case InstArithmetic::Sub: { |
| 1867 Variable *T = makeReg(Dest->getType()); | 1743 Variable *T = makeReg(Dest->getType()); |
| 1868 _movp(T, Src0); | 1744 _movp(T, Src0); |
| 1869 _psub(T, Src1); | 1745 _psub(T, Src1); |
| 1870 _movp(Dest, T); | 1746 _movp(Dest, T); |
| 1871 } break; | 1747 } break; |
| 1872 case InstArithmetic::Mul: { | 1748 case InstArithmetic::Mul: { |
| 1873 bool TypesAreValidForPmull = | 1749 bool TypesAreValidForPmull = |
| 1874 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; | 1750 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; |
| 1875 bool InstructionSetIsValidForPmull = | 1751 bool InstructionSetIsValidForPmull = |
| 1876 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; | 1752 Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1; |
| 1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 1753 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| 1878 Variable *T = makeReg(Dest->getType()); | 1754 Variable *T = makeReg(Dest->getType()); |
| 1879 _movp(T, Src0); | 1755 _movp(T, Src0); |
| 1880 _pmull(T, Src1); | 1756 _pmull(T, Src1); |
| 1881 _movp(Dest, T); | 1757 _movp(Dest, T); |
| 1882 } else if (Dest->getType() == IceType_v4i32) { | 1758 } else if (Dest->getType() == IceType_v4i32) { |
| 1883 // Lowering sequence: | 1759 // Lowering sequence: |
| 1884 // Note: The mask arguments have index 0 on the left. | 1760 // Note: The mask arguments have index 0 on the left. |
| 1885 // | 1761 // |
| 1886 // movups T1, Src0 | 1762 // movups T1, Src0 |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 1936 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 2061 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 1937 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 2062 Type Ty = Dest->getType(); | 1938 Type Ty = Dest->getType(); |
| 2063 // LLVM does the following for dest=src/(1<<log): | 1939 // LLVM does the following for dest=src/(1<<log): |
| 2064 // t=src | 1940 // t=src |
| 2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 1941 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 2066 // shr t,typewidth-log | 1942 // shr t,typewidth-log |
| 2067 // add t,src | 1943 // add t,src |
| 2068 // sar t,log | 1944 // sar t,log |
| 2069 // dest=t | 1945 // dest=t |
| 2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); | 1946 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 2071 _mov(T, Src0); | 1947 _mov(T, Src0); |
| 2072 // If for some reason we are dividing by 1, just treat it | 1948 // If for some reason we are dividing by 1, just treat it |
| 2073 // like an assignment. | 1949 // like an assignment. |
| 2074 if (LogDiv > 0) { | 1950 if (LogDiv > 0) { |
| 2075 // The initial sar is unnecessary when dividing by 2. | 1951 // The initial sar is unnecessary when dividing by 2. |
| 2076 if (LogDiv > 1) | 1952 if (LogDiv > 1) |
| 2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 1953 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 1954 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 2079 _add(T, Src0); | 1955 _add(T, Src0); |
| 2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1956 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2129 Type Ty = Dest->getType(); | 2005 Type Ty = Dest->getType(); |
| 2130 // LLVM does the following for dest=src%(1<<log): | 2006 // LLVM does the following for dest=src%(1<<log): |
| 2131 // t=src | 2007 // t=src |
| 2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 2008 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 2133 // shr t,typewidth-log | 2009 // shr t,typewidth-log |
| 2134 // add t,src | 2010 // add t,src |
| 2135 // and t, -(1<<log) | 2011 // and t, -(1<<log) |
| 2136 // sub t,src | 2012 // sub t,src |
| 2137 // neg t | 2013 // neg t |
| 2138 // dest=t | 2014 // dest=t |
| 2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); | 2015 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 2140 // If for some reason we are dividing by 1, just assign 0. | 2016 // If for some reason we are dividing by 1, just assign 0. |
| 2141 if (LogDiv == 0) { | 2017 if (LogDiv == 0) { |
| 2142 _mov(Dest, Ctx->getConstantZero(Ty)); | 2018 _mov(Dest, Ctx->getConstantZero(Ty)); |
| 2143 return; | 2019 return; |
| 2144 } | 2020 } |
| 2145 _mov(T, Src0); | 2021 _mov(T, Src0); |
| 2146 // The initial sar is unnecessary when dividing by 2. | 2022 // The initial sar is unnecessary when dividing by 2. |
| 2147 if (LogDiv > 1) | 2023 if (LogDiv > 1) |
| 2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 2024 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 2025 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2197 Type Ty = Dest->getType(); | 2073 Type Ty = Dest->getType(); |
| 2198 InstCall *Call = makeHelperCall( | 2074 InstCall *Call = makeHelperCall( |
| 2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 2075 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
| 2200 Call->addArg(Src0); | 2076 Call->addArg(Src0); |
| 2201 Call->addArg(Src1); | 2077 Call->addArg(Src1); |
| 2202 return lowerCall(Call); | 2078 return lowerCall(Call); |
| 2203 } | 2079 } |
| 2204 } | 2080 } |
| 2205 } | 2081 } |
| 2206 | 2082 |
| 2207 void TargetX8632::lowerAssign(const InstAssign *Inst) { | 2083 template <class Machine> |
| 2084 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | |
| 2208 Variable *Dest = Inst->getDest(); | 2085 Variable *Dest = Inst->getDest(); |
| 2209 Operand *Src0 = Inst->getSrc(0); | 2086 Operand *Src0 = Inst->getSrc(0); |
| 2210 assert(Dest->getType() == Src0->getType()); | 2087 assert(Dest->getType() == Src0->getType()); |
| 2211 if (Dest->getType() == IceType_i64) { | 2088 if (Dest->getType() == IceType_i64) { |
| 2212 Src0 = legalize(Src0); | 2089 Src0 = legalize(Src0); |
| 2213 Operand *Src0Lo = loOperand(Src0); | 2090 Operand *Src0Lo = loOperand(Src0); |
| 2214 Operand *Src0Hi = hiOperand(Src0); | 2091 Operand *Src0Hi = hiOperand(Src0); |
| 2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2092 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2093 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2217 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 2094 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 2242 // register or a scalar integer immediate. | 2119 // register or a scalar integer immediate. |
| 2243 RI = legalize(Src0, Legal_Reg | Legal_Imm); | 2120 RI = legalize(Src0, Legal_Reg | Legal_Imm); |
| 2244 } | 2121 } |
| 2245 if (isVectorType(Dest->getType())) | 2122 if (isVectorType(Dest->getType())) |
| 2246 _movp(Dest, RI); | 2123 _movp(Dest, RI); |
| 2247 else | 2124 else |
| 2248 _mov(Dest, RI); | 2125 _mov(Dest, RI); |
| 2249 } | 2126 } |
| 2250 } | 2127 } |
| 2251 | 2128 |
| 2252 void TargetX8632::lowerBr(const InstBr *Inst) { | 2129 template <class Machine> |
| 2130 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { | |
| 2253 if (Inst->isUnconditional()) { | 2131 if (Inst->isUnconditional()) { |
| 2254 _br(Inst->getTargetUnconditional()); | 2132 _br(Inst->getTargetUnconditional()); |
| 2255 return; | 2133 return; |
| 2256 } | 2134 } |
| 2257 Operand *Cond = Inst->getCondition(); | 2135 Operand *Cond = Inst->getCondition(); |
| 2258 | 2136 |
| 2259 // Handle folding opportunities. | 2137 // Handle folding opportunities. |
| 2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { | 2138 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| 2261 assert(Producer->isDeleted()); | 2139 assert(Producer->isDeleted()); |
| 2262 switch (BoolFolding::getProducerKind(Producer)) { | 2140 switch (BoolFolding::getProducerKind(Producer)) { |
| 2263 default: | 2141 default: |
| 2264 break; | 2142 break; |
| 2265 case BoolFolding::PK_Icmp32: { | 2143 case BoolFolding::PK_Icmp32: { |
| 2266 // TODO(stichnot): Refactor similarities between this block and | 2144 // TODO(stichnot): Refactor similarities between this block and |
| 2267 // the corresponding code in lowerIcmp(). | 2145 // the corresponding code in lowerIcmp(). |
| 2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 2146 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| 2269 Operand *Src0 = Producer->getSrc(0); | 2147 Operand *Src0 = Producer->getSrc(0); |
| 2270 Operand *Src1 = legalize(Producer->getSrc(1)); | 2148 Operand *Src1 = legalize(Producer->getSrc(1)); |
| 2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2149 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 2272 _cmp(Src0RM, Src1); | 2150 _cmp(Src0RM, Src1); |
| 2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), | 2151 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), |
| 2274 Inst->getTargetFalse()); | 2152 Inst->getTargetFalse()); |
| 2275 return; | 2153 return; |
| 2276 } | 2154 } |
| 2277 } | 2155 } |
| 2278 } | 2156 } |
| 2279 | 2157 |
| 2280 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2158 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 2281 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2159 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2282 _cmp(Src0, Zero); | 2160 _cmp(Src0, Zero); |
| 2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 2161 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| 2284 } | 2162 } |
| 2285 | 2163 |
| 2286 void TargetX8632::lowerCall(const InstCall *Instr) { | 2164 template <class Machine> |
| 2165 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { | |
| 2287 // x86-32 calling convention: | 2166 // x86-32 calling convention: |
| 2288 // | 2167 // |
| 2289 // * At the point before the call, the stack must be aligned to 16 | 2168 // * At the point before the call, the stack must be aligned to 16 |
| 2290 // bytes. | 2169 // bytes. |
| 2291 // | 2170 // |
| 2292 // * The first four arguments of vector type, regardless of their | 2171 // * The first four arguments of vector type, regardless of their |
| 2293 // position relative to the other arguments in the argument list, are | 2172 // position relative to the other arguments in the argument list, are |
| 2294 // placed in registers xmm0 - xmm3. | 2173 // placed in registers xmm0 - xmm3. |
| 2295 // | 2174 // |
| 2296 // * Other arguments are pushed onto the stack in right-to-left order, | 2175 // * Other arguments are pushed onto the stack in right-to-left order, |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 2311 OperandList StackArgs, StackArgLocations; | 2190 OperandList StackArgs, StackArgLocations; |
| 2312 uint32_t ParameterAreaSizeBytes = 0; | 2191 uint32_t ParameterAreaSizeBytes = 0; |
| 2313 | 2192 |
| 2314 // Classify each argument operand according to the location where the | 2193 // Classify each argument operand according to the location where the |
| 2315 // argument is passed. | 2194 // argument is passed. |
| 2316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 2195 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 2317 Operand *Arg = Instr->getArg(i); | 2196 Operand *Arg = Instr->getArg(i); |
| 2318 Type Ty = Arg->getType(); | 2197 Type Ty = Arg->getType(); |
| 2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 2198 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 2320 assert(typeWidthInBytes(Ty) >= 4); | 2199 assert(typeWidthInBytes(Ty) >= 4); |
| 2321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 2200 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| 2322 XmmArgs.push_back(Arg); | 2201 XmmArgs.push_back(Arg); |
| 2323 } else { | 2202 } else { |
| 2324 StackArgs.push_back(Arg); | 2203 StackArgs.push_back(Arg); |
| 2325 if (isVectorType(Arg->getType())) { | 2204 if (isVectorType(Arg->getType())) { |
| 2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 2205 ParameterAreaSizeBytes = |
| 2206 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 2327 } | 2207 } |
| 2328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 2208 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
| 2329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 2209 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 2330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | 2210 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
| 2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 2211 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 2332 } | 2212 } |
| 2333 } | 2213 } |
| 2334 | 2214 |
| 2335 // Adjust the parameter area so that the stack is aligned. It is | 2215 // Adjust the parameter area so that the stack is aligned. It is |
| 2336 // assumed that the stack is already aligned at the start of the | 2216 // assumed that the stack is already aligned at the start of the |
| 2337 // calling sequence. | 2217 // calling sequence. |
| 2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 2218 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 2339 | 2219 |
| 2340 // Subtract the appropriate amount for the argument area. This also | 2220 // Subtract the appropriate amount for the argument area. This also |
| 2341 // takes care of setting the stack adjustment during emission. | 2221 // takes care of setting the stack adjustment during emission. |
| 2342 // | 2222 // |
| 2343 // TODO: If for some reason the call instruction gets dead-code | 2223 // TODO: If for some reason the call instruction gets dead-code |
| 2344 // eliminated after lowering, we would need to ensure that the | 2224 // eliminated after lowering, we would need to ensure that the |
| 2345 // pre-call and the post-call esp adjustment get eliminated as well. | 2225 // pre-call and the post-call esp adjustment get eliminated as well. |
| 2346 if (ParameterAreaSizeBytes) { | 2226 if (ParameterAreaSizeBytes) { |
| 2347 _adjust_stack(ParameterAreaSizeBytes); | 2227 _adjust_stack(ParameterAreaSizeBytes); |
| 2348 } | 2228 } |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2411 Operand *CallTarget = legalize(Instr->getCallTarget()); | 2291 Operand *CallTarget = legalize(Instr->getCallTarget()); |
| 2412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 2292 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 2413 if (NeedSandboxing) { | 2293 if (NeedSandboxing) { |
| 2414 if (llvm::isa<Constant>(CallTarget)) { | 2294 if (llvm::isa<Constant>(CallTarget)) { |
| 2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | 2295 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 2416 } else { | 2296 } else { |
| 2417 Variable *CallTargetVar = nullptr; | 2297 Variable *CallTargetVar = nullptr; |
| 2418 _mov(CallTargetVar, CallTarget); | 2298 _mov(CallTargetVar, CallTarget); |
| 2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | 2299 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 2420 const SizeT BundleSize = | 2300 const SizeT BundleSize = |
| 2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 2301 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | 2302 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 2423 CallTarget = CallTargetVar; | 2303 CallTarget = CallTargetVar; |
| 2424 } | 2304 } |
| 2425 } | 2305 } |
| 2426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 2306 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
| 2427 Context.insert(NewCall); | 2307 Context.insert(NewCall); |
| 2428 if (NeedSandboxing) | 2308 if (NeedSandboxing) |
| 2429 _bundle_unlock(); | 2309 _bundle_unlock(); |
| 2430 if (ReturnRegHi) | 2310 if (ReturnRegHi) |
| 2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 2311 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2473 // st(0). | 2353 // st(0). |
| 2474 // If Dest ends up being a physical xmm register, the fstp emit code | 2354 // If Dest ends up being a physical xmm register, the fstp emit code |
| 2475 // will route st(0) through a temporary stack slot. | 2355 // will route st(0) through a temporary stack slot. |
| 2476 _fstp(Dest); | 2356 _fstp(Dest); |
| 2477 // Create a fake use of Dest in case it actually isn't used, | 2357 // Create a fake use of Dest in case it actually isn't used, |
| 2478 // because st(0) still needs to be popped. | 2358 // because st(0) still needs to be popped. |
| 2479 Context.insert(InstFakeUse::create(Func, Dest)); | 2359 Context.insert(InstFakeUse::create(Func, Dest)); |
| 2480 } | 2360 } |
| 2481 } | 2361 } |
| 2482 | 2362 |
| 2483 void TargetX8632::lowerCast(const InstCast *Inst) { | 2363 template <class Machine> |
| 2364 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | |
| 2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 2365 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
| 2485 InstCast::OpKind CastKind = Inst->getCastKind(); | 2366 InstCast::OpKind CastKind = Inst->getCastKind(); |
| 2486 Variable *Dest = Inst->getDest(); | 2367 Variable *Dest = Inst->getDest(); |
| 2487 switch (CastKind) { | 2368 switch (CastKind) { |
| 2488 default: | 2369 default: |
| 2489 Func->setError("Cast type not supported"); | 2370 Func->setError("Cast type not supported"); |
| 2490 return; | 2371 return; |
| 2491 case InstCast::Sext: { | 2372 case InstCast::Sext: { |
| 2492 // Src0RM is the source operand legalized to physical register or memory, | 2373 // Src0RM is the source operand legalized to physical register or memory, |
| 2493 // but not immediate, since the relevant x86 native instructions don't | 2374 // but not immediate, since the relevant x86 native instructions don't |
| 2494 // allow an immediate operand. If the operand is an immediate, we could | 2375 // allow an immediate operand. If the operand is an immediate, we could |
| 2495 // consider computing the strength-reduced result at translation time, | 2376 // consider computing the strength-reduced result at translation time, |
| 2496 // but we're unlikely to see something like that in the bitcode that | 2377 // but we're unlikely to see something like that in the bitcode that |
| 2497 // the optimizer wouldn't have already taken care of. | 2378 // the optimizer wouldn't have already taken care of. |
| 2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2379 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2499 if (isVectorType(Dest->getType())) { | 2380 if (isVectorType(Dest->getType())) { |
| 2500 Type DestTy = Dest->getType(); | 2381 Type DestTy = Dest->getType(); |
| 2501 if (DestTy == IceType_v16i8) { | 2382 if (DestTy == IceType_v16i8) { |
| 2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 | 2383 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
| 2503 Variable *OneMask = makeVectorOfOnes(Dest->getType()); | 2384 Variable *OneMask = makeVectorOfOnes(Dest->getType()); |
| 2504 Variable *T = makeReg(DestTy); | 2385 Variable *T = makeReg(DestTy); |
| 2505 _movp(T, Src0RM); | 2386 _movp(T, Src0RM); |
| 2506 _pand(T, OneMask); | 2387 _pand(T, OneMask); |
| 2507 Variable *Zeros = makeVectorOfZeros(Dest->getType()); | 2388 Variable *Zeros = makeVectorOfZeros(Dest->getType()); |
| 2508 _pcmpgt(T, Zeros); | 2389 _pcmpgt(T, Zeros); |
| 2509 _movp(Dest, T); | 2390 _movp(Dest, T); |
| 2510 } else { | 2391 } else { |
| 2511 // width = width(elty) - 1; dest = (src << width) >> width | 2392 // width = width(elty) - 1; dest = (src << width) >> width |
| 2512 SizeT ShiftAmount = | 2393 SizeT ShiftAmount = |
| 2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; | 2394 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 2395 1; | |
| 2514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 2396 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 2515 Variable *T = makeReg(DestTy); | 2397 Variable *T = makeReg(DestTy); |
| 2516 _movp(T, Src0RM); | 2398 _movp(T, Src0RM); |
| 2517 _psll(T, ShiftConstant); | 2399 _psll(T, ShiftConstant); |
| 2518 _psra(T, ShiftConstant); | 2400 _psra(T, ShiftConstant); |
| 2519 _movp(Dest, T); | 2401 _movp(Dest, T); |
| 2520 } | 2402 } |
| 2521 } else if (Dest->getType() == IceType_i64) { | 2403 } else if (Dest->getType() == IceType_i64) { |
| 2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | 2404 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 |
| 2523 Constant *Shift = Ctx->getConstantInt32(31); | 2405 Constant *Shift = Ctx->getConstantInt32(31); |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 2538 _mov(T_Hi, T_Lo); | 2420 _mov(T_Hi, T_Lo); |
| 2539 if (Src0RM->getType() != IceType_i1) | 2421 if (Src0RM->getType() != IceType_i1) |
| 2540 // For i1, the sar instruction is already done above. | 2422 // For i1, the sar instruction is already done above. |
| 2541 _sar(T_Hi, Shift); | 2423 _sar(T_Hi, Shift); |
| 2542 _mov(DestHi, T_Hi); | 2424 _mov(DestHi, T_Hi); |
| 2543 } else if (Src0RM->getType() == IceType_i1) { | 2425 } else if (Src0RM->getType() == IceType_i1) { |
| 2544 // t1 = src | 2426 // t1 = src |
| 2545 // shl t1, dst_bitwidth - 1 | 2427 // shl t1, dst_bitwidth - 1 |
| 2546 // sar t1, dst_bitwidth - 1 | 2428 // sar t1, dst_bitwidth - 1 |
| 2547 // dst = t1 | 2429 // dst = t1 |
| 2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); | 2430 size_t DestBits = |
| 2431 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); | |
| 2549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); | 2432 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); |
| 2550 Variable *T = makeReg(Dest->getType()); | 2433 Variable *T = makeReg(Dest->getType()); |
| 2551 if (typeWidthInBytes(Dest->getType()) <= | 2434 if (typeWidthInBytes(Dest->getType()) <= |
| 2552 typeWidthInBytes(Src0RM->getType())) { | 2435 typeWidthInBytes(Src0RM->getType())) { |
| 2553 _mov(T, Src0RM); | 2436 _mov(T, Src0RM); |
| 2554 } else { | 2437 } else { |
| 2555 // Widen the source using movsx or movzx. (It doesn't matter | 2438 // Widen the source using movsx or movzx. (It doesn't matter |
| 2556 // which one, since the following shl/sar overwrite the bits.) | 2439 // which one, since the following shl/sar overwrite the bits.) |
| 2557 _movzx(T, Src0RM); | 2440 _movzx(T, Src0RM); |
| 2558 } | 2441 } |
| (...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2943 case IceType_v4i32: | 2826 case IceType_v4i32: |
| 2944 case IceType_v4f32: { | 2827 case IceType_v4f32: { |
| 2945 _movp(Dest, legalizeToVar(Src0)); | 2828 _movp(Dest, legalizeToVar(Src0)); |
| 2946 } break; | 2829 } break; |
| 2947 } | 2830 } |
| 2948 break; | 2831 break; |
| 2949 } | 2832 } |
| 2950 } | 2833 } |
| 2951 } | 2834 } |
| 2952 | 2835 |
| 2953 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { | 2836 template <class Machine> |
| 2837 void TargetX86Base<Machine>::lowerExtractElement( | |
| 2838 const InstExtractElement *Inst) { | |
| 2954 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2839 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2955 ConstantInteger32 *ElementIndex = | 2840 ConstantInteger32 *ElementIndex = |
| 2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2841 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
| 2957 // Only constant indices are allowed in PNaCl IR. | 2842 // Only constant indices are allowed in PNaCl IR. |
| 2958 assert(ElementIndex); | 2843 assert(ElementIndex); |
| 2959 | 2844 |
| 2960 unsigned Index = ElementIndex->getValue(); | 2845 unsigned Index = ElementIndex->getValue(); |
| 2961 Type Ty = SourceVectNotLegalized->getType(); | 2846 Type Ty = SourceVectNotLegalized->getType(); |
| 2962 Type ElementTy = typeElementType(Ty); | 2847 Type ElementTy = typeElementType(Ty); |
| 2963 Type InVectorElementTy = getInVectorElementType(Ty); | 2848 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
| 2964 Variable *ExtractedElementR = makeReg(InVectorElementTy); | 2849 Variable *ExtractedElementR = makeReg(InVectorElementTy); |
| 2965 | 2850 |
| 2966 // TODO(wala): Determine the best lowering sequences for each type. | 2851 // TODO(wala): Determine the best lowering sequences for each type. |
| 2967 bool CanUsePextr = | 2852 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 2968 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2853 InstructionSet >= Machine::SSE4_1; |
| 2969 if (CanUsePextr && Ty != IceType_v4f32) { | 2854 if (CanUsePextr && Ty != IceType_v4f32) { |
| 2970 // Use pextrb, pextrw, or pextrd. | 2855 // Use pextrb, pextrw, or pextrd. |
| 2971 Constant *Mask = Ctx->getConstantInt32(Index); | 2856 Constant *Mask = Ctx->getConstantInt32(Index); |
| 2972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2857 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
| 2973 _pextr(ExtractedElementR, SourceVectR, Mask); | 2858 _pextr(ExtractedElementR, SourceVectR, Mask); |
| 2974 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2859 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2975 // Use pshufd and movd/movss. | 2860 // Use pshufd and movd/movss. |
| 2976 Variable *T = nullptr; | 2861 Variable *T = nullptr; |
| 2977 if (Index) { | 2862 if (Index) { |
| 2978 // The shuffle only needs to occur if the element to be extracted | 2863 // The shuffle only needs to occur if the element to be extracted |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); | 2904 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); |
| 3020 lowerCast(Cast); | 2905 lowerCast(Cast); |
| 3021 ExtractedElementR = T; | 2906 ExtractedElementR = T; |
| 3022 } | 2907 } |
| 3023 | 2908 |
| 3024 // Copy the element to the destination. | 2909 // Copy the element to the destination. |
| 3025 Variable *Dest = Inst->getDest(); | 2910 Variable *Dest = Inst->getDest(); |
| 3026 _mov(Dest, ExtractedElementR); | 2911 _mov(Dest, ExtractedElementR); |
| 3027 } | 2912 } |
| 3028 | 2913 |
| 3029 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 2914 template <class Machine> |
| 2915 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { | |
| 3030 Operand *Src0 = Inst->getSrc(0); | 2916 Operand *Src0 = Inst->getSrc(0); |
| 3031 Operand *Src1 = Inst->getSrc(1); | 2917 Operand *Src1 = Inst->getSrc(1); |
| 3032 Variable *Dest = Inst->getDest(); | 2918 Variable *Dest = Inst->getDest(); |
| 3033 | 2919 |
| 3034 if (isVectorType(Dest->getType())) { | 2920 if (isVectorType(Dest->getType())) { |
| 3035 InstFcmp::FCond Condition = Inst->getCondition(); | 2921 InstFcmp::FCond Condition = Inst->getCondition(); |
| 3036 size_t Index = static_cast<size_t>(Condition); | 2922 size_t Index = static_cast<size_t>(Condition); |
| 3037 assert(Index < TableFcmpSize); | 2923 assert(Index < Traits::TableFcmpSize); |
| 3038 | 2924 |
| 3039 if (TableFcmp[Index].SwapVectorOperands) { | 2925 if (Traits::TableFcmp[Index].SwapVectorOperands) { |
| 3040 Operand *T = Src0; | 2926 Operand *T = Src0; |
| 3041 Src0 = Src1; | 2927 Src0 = Src1; |
| 3042 Src1 = T; | 2928 Src1 = T; |
| 3043 } | 2929 } |
| 3044 | 2930 |
| 3045 Variable *T = nullptr; | 2931 Variable *T = nullptr; |
| 3046 | 2932 |
| 3047 if (Condition == InstFcmp::True) { | 2933 if (Condition == InstFcmp::True) { |
| 3048 // makeVectorOfOnes() requires an integer vector type. | 2934 // makeVectorOfOnes() requires an integer vector type. |
| 3049 T = makeVectorOfMinusOnes(IceType_v4i32); | 2935 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 3050 } else if (Condition == InstFcmp::False) { | 2936 } else if (Condition == InstFcmp::False) { |
| 3051 T = makeVectorOfZeros(Dest->getType()); | 2937 T = makeVectorOfZeros(Dest->getType()); |
| 3052 } else { | 2938 } else { |
| 3053 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2939 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 3054 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2940 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 3055 if (llvm::isa<OperandX8632Mem>(Src1RM)) | 2941 if (llvm::isa<OperandX8632Mem>(Src1RM)) |
| 3056 Src1RM = legalizeToVar(Src1RM); | 2942 Src1RM = legalizeToVar(Src1RM); |
| 3057 | 2943 |
| 3058 switch (Condition) { | 2944 switch (Condition) { |
| 3059 default: { | 2945 default: { |
| 3060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2946 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate; |
| 3061 assert(Predicate != CondX86::Cmpps_Invalid); | 2947 assert(Predicate != CondX86::Cmpps_Invalid); |
| 3062 T = makeReg(Src0RM->getType()); | 2948 T = makeReg(Src0RM->getType()); |
| 3063 _movp(T, Src0RM); | 2949 _movp(T, Src0RM); |
| 3064 _cmpps(T, Src1RM, Predicate); | 2950 _cmpps(T, Src1RM, Predicate); |
| 3065 } break; | 2951 } break; |
| 3066 case InstFcmp::One: { | 2952 case InstFcmp::One: { |
| 3067 // Check both unequal and ordered. | 2953 // Check both unequal and ordered. |
| 3068 T = makeReg(Src0RM->getType()); | 2954 T = makeReg(Src0RM->getType()); |
| 3069 Variable *T2 = makeReg(Src0RM->getType()); | 2955 Variable *T2 = makeReg(Src0RM->getType()); |
| 3070 _movp(T, Src0RM); | 2956 _movp(T, Src0RM); |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 3099 // j<C2> label /* only if C2 != Br_None */ | 2985 // j<C2> label /* only if C2 != Br_None */ |
| 3100 // FakeUse(a) /* only if C1 != Br_None */ | 2986 // FakeUse(a) /* only if C1 != Br_None */ |
| 3101 // mov a, !<default> /* only if C1 != Br_None */ | 2987 // mov a, !<default> /* only if C1 != Br_None */ |
| 3102 // label: /* only if C1 != Br_None */ | 2988 // label: /* only if C1 != Br_None */ |
| 3103 // | 2989 // |
| 3104 // setcc lowering when C1 != Br_None && C2 == Br_None: | 2990 // setcc lowering when C1 != Br_None && C2 == Br_None: |
| 3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ | 2991 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ |
| 3106 // setcc a, C1 | 2992 // setcc a, C1 |
| 3107 InstFcmp::FCond Condition = Inst->getCondition(); | 2993 InstFcmp::FCond Condition = Inst->getCondition(); |
| 3108 size_t Index = static_cast<size_t>(Condition); | 2994 size_t Index = static_cast<size_t>(Condition); |
| 3109 assert(Index < TableFcmpSize); | 2995 assert(Index < Traits::TableFcmpSize); |
| 3110 if (TableFcmp[Index].SwapScalarOperands) | 2996 if (Traits::TableFcmp[Index].SwapScalarOperands) |
| 3111 std::swap(Src0, Src1); | 2997 std::swap(Src0, Src1); |
| 3112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); | 2998 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None); |
| 3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); | 2999 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None); |
| 3114 if (HasC1) { | 3000 if (HasC1) { |
| 3115 Src0 = legalize(Src0); | 3001 Src0 = legalize(Src0); |
| 3116 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 3002 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 3117 Variable *T = nullptr; | 3003 Variable *T = nullptr; |
| 3118 _mov(T, Src0); | 3004 _mov(T, Src0); |
| 3119 _ucomiss(T, Src1RM); | 3005 _ucomiss(T, Src1RM); |
| 3120 if (!HasC2) { | 3006 if (!HasC2) { |
| 3121 assert(TableFcmp[Index].Default); | 3007 assert(Traits::TableFcmp[Index].Default); |
| 3122 _setcc(Dest, TableFcmp[Index].C1); | 3008 _setcc(Dest, Traits::TableFcmp[Index].C1); |
| 3123 return; | 3009 return; |
| 3124 } | 3010 } |
| 3125 } | 3011 } |
| 3126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default); | 3012 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default); |
| 3127 _mov(Dest, Default); | 3013 _mov(Dest, Default); |
| 3128 if (HasC1) { | 3014 if (HasC1) { |
| 3129 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3015 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 3130 _br(TableFcmp[Index].C1, Label); | 3016 _br(Traits::TableFcmp[Index].C1, Label); |
| 3131 if (HasC2) { | 3017 if (HasC2) { |
| 3132 _br(TableFcmp[Index].C2, Label); | 3018 _br(Traits::TableFcmp[Index].C2, Label); |
| 3133 } | 3019 } |
| 3134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default); | 3020 Constant *NonDefault = |
| 3021 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default); | |
| 3135 _mov_nonkillable(Dest, NonDefault); | 3022 _mov_nonkillable(Dest, NonDefault); |
| 3136 Context.insert(Label); | 3023 Context.insert(Label); |
| 3137 } | 3024 } |
| 3138 } | 3025 } |
| 3139 | 3026 |
| 3140 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 3027 template <class Machine> |
| 3028 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) { | |
| 3141 Operand *Src0 = legalize(Inst->getSrc(0)); | 3029 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 3142 Operand *Src1 = legalize(Inst->getSrc(1)); | 3030 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 3143 Variable *Dest = Inst->getDest(); | 3031 Variable *Dest = Inst->getDest(); |
| 3144 | 3032 |
| 3145 if (isVectorType(Dest->getType())) { | 3033 if (isVectorType(Dest->getType())) { |
| 3146 Type Ty = Src0->getType(); | 3034 Type Ty = Src0->getType(); |
| 3147 // Promote i1 vectors to 128 bit integer vector types. | 3035 // Promote i1 vectors to 128 bit integer vector types. |
| 3148 if (typeElementType(Ty) == IceType_i1) { | 3036 if (typeElementType(Ty) == IceType_i1) { |
| 3149 Type NewTy = IceType_NUM; | 3037 Type NewTy = IceType_NUM; |
| 3150 switch (Ty) { | 3038 switch (Ty) { |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3248 | 3136 |
| 3249 _movp(Dest, T); | 3137 _movp(Dest, T); |
| 3250 eliminateNextVectorSextInstruction(Dest); | 3138 eliminateNextVectorSextInstruction(Dest); |
| 3251 return; | 3139 return; |
| 3252 } | 3140 } |
| 3253 | 3141 |
| 3254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 3142 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| 3255 if (Src0->getType() == IceType_i64) { | 3143 if (Src0->getType() == IceType_i64) { |
| 3256 InstIcmp::ICond Condition = Inst->getCondition(); | 3144 InstIcmp::ICond Condition = Inst->getCondition(); |
| 3257 size_t Index = static_cast<size_t>(Condition); | 3145 size_t Index = static_cast<size_t>(Condition); |
| 3258 assert(Index < TableIcmp64Size); | 3146 assert(Index < Traits::TableIcmp64Size); |
| 3259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | 3147 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); |
| 3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | 3148 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); |
| 3261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 3149 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| 3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 3150 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| 3263 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3151 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 3264 Constant *One = Ctx->getConstantInt32(1); | 3152 Constant *One = Ctx->getConstantInt32(1); |
| 3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | 3153 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
| 3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | 3154 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
| 3267 _mov(Dest, One); | 3155 _mov(Dest, One); |
| 3268 _cmp(Src0HiRM, Src1HiRI); | 3156 _cmp(Src0HiRM, Src1HiRI); |
| 3269 if (TableIcmp64[Index].C1 != CondX86::Br_None) | 3157 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None) |
| 3270 _br(TableIcmp64[Index].C1, LabelTrue); | 3158 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 3271 if (TableIcmp64[Index].C2 != CondX86::Br_None) | 3159 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None) |
| 3272 _br(TableIcmp64[Index].C2, LabelFalse); | 3160 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 3273 _cmp(Src0LoRM, Src1LoRI); | 3161 _cmp(Src0LoRM, Src1LoRI); |
| 3274 _br(TableIcmp64[Index].C3, LabelTrue); | 3162 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 3275 Context.insert(LabelFalse); | 3163 Context.insert(LabelFalse); |
| 3276 _mov_nonkillable(Dest, Zero); | 3164 _mov_nonkillable(Dest, Zero); |
| 3277 Context.insert(LabelTrue); | 3165 Context.insert(LabelTrue); |
| 3278 return; | 3166 return; |
| 3279 } | 3167 } |
| 3280 | 3168 |
| 3281 // cmp b, c | 3169 // cmp b, c |
| 3282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 3170 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 3283 _cmp(Src0RM, Src1); | 3171 _cmp(Src0RM, Src1); |
| 3284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition())); | 3172 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); |
| 3285 } | 3173 } |
| 3286 | 3174 |
| 3287 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | 3175 template <class Machine> |
| 3176 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | |
| 3288 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3177 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3178 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| 3290 ConstantInteger32 *ElementIndex = | 3179 ConstantInteger32 *ElementIndex = |
| 3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 3180 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
| 3292 // Only constant indices are allowed in PNaCl IR. | 3181 // Only constant indices are allowed in PNaCl IR. |
| 3293 assert(ElementIndex); | 3182 assert(ElementIndex); |
| 3294 unsigned Index = ElementIndex->getValue(); | 3183 unsigned Index = ElementIndex->getValue(); |
| 3295 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 3184 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
| 3296 | 3185 |
| 3297 Type Ty = SourceVectNotLegalized->getType(); | 3186 Type Ty = SourceVectNotLegalized->getType(); |
| 3298 Type ElementTy = typeElementType(Ty); | 3187 Type ElementTy = typeElementType(Ty); |
| 3299 Type InVectorElementTy = getInVectorElementType(Ty); | 3188 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
| 3300 | 3189 |
| 3301 if (ElementTy == IceType_i1) { | 3190 if (ElementTy == IceType_i1) { |
| 3302 // Expand the element to the appropriate size for it to be inserted | 3191 // Expand the element to the appropriate size for it to be inserted |
| 3303 // in the vector. | 3192 // in the vector. |
| 3304 Variable *Expanded = Func->makeVariable(InVectorElementTy); | 3193 Variable *Expanded = Func->makeVariable(InVectorElementTy); |
| 3305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, | 3194 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, |
| 3306 ElementToInsertNotLegalized); | 3195 ElementToInsertNotLegalized); |
| 3307 lowerCast(Cast); | 3196 lowerCast(Cast); |
| 3308 ElementToInsertNotLegalized = Expanded; | 3197 ElementToInsertNotLegalized = Expanded; |
| 3309 } | 3198 } |
| 3310 | 3199 |
| 3311 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { | 3200 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 3201 InstructionSet >= Machine::SSE4_1) { | |
| 3312 // Use insertps, pinsrb, pinsrw, or pinsrd. | 3202 // Use insertps, pinsrb, pinsrw, or pinsrd. |
| 3313 Operand *ElementRM = | 3203 Operand *ElementRM = |
| 3314 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3204 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
| 3315 Operand *SourceVectRM = | 3205 Operand *SourceVectRM = |
| 3316 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3206 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 3317 Variable *T = makeReg(Ty); | 3207 Variable *T = makeReg(Ty); |
| 3318 _movp(T, SourceVectRM); | 3208 _movp(T, SourceVectRM); |
| 3319 if (Ty == IceType_v4f32) | 3209 if (Ty == IceType_v4f32) |
| 3320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3210 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
| 3321 else | 3211 else |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3400 OperandX8632Mem *Loc = | 3290 OperandX8632Mem *Loc = |
| 3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 3291 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
| 3402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); | 3292 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); |
| 3403 | 3293 |
| 3404 Variable *T = makeReg(Ty); | 3294 Variable *T = makeReg(Ty); |
| 3405 _movp(T, Slot); | 3295 _movp(T, Slot); |
| 3406 _movp(Inst->getDest(), T); | 3296 _movp(Inst->getDest(), T); |
| 3407 } | 3297 } |
| 3408 } | 3298 } |
| 3409 | 3299 |
| 3410 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 3300 template <class Machine> |
| 3301 void TargetX86Base<Machine>::lowerIntrinsicCall( | |
| 3302 const InstIntrinsicCall *Instr) { | |
| 3411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { | 3303 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
| 3412 case Intrinsics::AtomicCmpxchg: { | 3304 case Intrinsics::AtomicCmpxchg: { |
| 3413 if (!Intrinsics::isMemoryOrderValid( | 3305 if (!Intrinsics::isMemoryOrderValid( |
| 3414 ID, getConstantMemoryOrder(Instr->getArg(3)), | 3306 ID, getConstantMemoryOrder(Instr->getArg(3)), |
| 3415 getConstantMemoryOrder(Instr->getArg(4)))) { | 3307 getConstantMemoryOrder(Instr->getArg(4)))) { |
| 3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 3308 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| 3417 return; | 3309 return; |
| 3418 } | 3310 } |
| 3419 Variable *DestPrev = Instr->getDest(); | 3311 Variable *DestPrev = Instr->getDest(); |
| 3420 Operand *PtrToMem = Instr->getArg(0); | 3312 Operand *PtrToMem = Instr->getArg(0); |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3503 Context.insert( | 3395 Context.insert( |
| 3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 3396 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 3505 return; | 3397 return; |
| 3506 } | 3398 } |
| 3507 case Intrinsics::AtomicRMW: | 3399 case Intrinsics::AtomicRMW: |
| 3508 if (!Intrinsics::isMemoryOrderValid( | 3400 if (!Intrinsics::isMemoryOrderValid( |
| 3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3401 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| 3510 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3402 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 3511 return; | 3403 return; |
| 3512 } | 3404 } |
| 3513 lowerAtomicRMW( | 3405 lowerAtomicRMW(Instr->getDest(), |
| 3514 Instr->getDest(), | 3406 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( |
| 3515 static_cast<uint32_t>( | 3407 Instr->getArg(0))->getValue()), |
| 3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), | 3408 Instr->getArg(1), Instr->getArg(2)); |
| 3517 Instr->getArg(1), Instr->getArg(2)); | |
| 3518 return; | 3409 return; |
| 3519 case Intrinsics::AtomicStore: { | 3410 case Intrinsics::AtomicStore: { |
| 3520 if (!Intrinsics::isMemoryOrderValid( | 3411 if (!Intrinsics::isMemoryOrderValid( |
| 3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 3412 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 3522 Func->setError("Unexpected memory ordering for AtomicStore"); | 3413 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 3523 return; | 3414 return; |
| 3524 } | 3415 } |
| 3525 // We require the memory address to be naturally aligned. | 3416 // We require the memory address to be naturally aligned. |
| 3526 // Given that is the case, then normal stores are atomic. | 3417 // Given that is the case, then normal stores are atomic. |
| 3527 // Add a fence after the store to make it visible. | 3418 // Add a fence after the store to make it visible. |
| (...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3733 case Intrinsics::Trap: | 3624 case Intrinsics::Trap: |
| 3734 _ud2(); | 3625 _ud2(); |
| 3735 return; | 3626 return; |
| 3736 case Intrinsics::UnknownIntrinsic: | 3627 case Intrinsics::UnknownIntrinsic: |
| 3737 Func->setError("Should not be lowering UnknownIntrinsic"); | 3628 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3738 return; | 3629 return; |
| 3739 } | 3630 } |
| 3740 return; | 3631 return; |
| 3741 } | 3632 } |
| 3742 | 3633 |
| 3743 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, | 3634 template <class Machine> |
| 3744 Operand *Expected, Operand *Desired) { | 3635 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3636 Operand *Ptr, Operand *Expected, | |
| 3637 Operand *Desired) { | |
| 3745 if (Expected->getType() == IceType_i64) { | 3638 if (Expected->getType() == IceType_i64) { |
| 3746 // Reserve the pre-colored registers first, before adding any more | 3639 // Reserve the pre-colored registers first, before adding any more |
| 3747 // infinite-weight variables from formMemoryOperand's legalization. | 3640 // infinite-weight variables from formMemoryOperand's legalization. |
| 3748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 3641 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
| 3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); | 3642 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); |
| 3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 3643 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
| 3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); | 3644 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); |
| 3752 _mov(T_eax, loOperand(Expected)); | 3645 _mov(T_eax, loOperand(Expected)); |
| 3753 _mov(T_edx, hiOperand(Expected)); | 3646 _mov(T_edx, hiOperand(Expected)); |
| 3754 _mov(T_ebx, loOperand(Desired)); | 3647 _mov(T_ebx, loOperand(Desired)); |
| 3755 _mov(T_ecx, hiOperand(Desired)); | 3648 _mov(T_ecx, hiOperand(Desired)); |
| 3756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); | 3649 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); |
| 3757 const bool Locked = true; | 3650 const bool Locked = true; |
| 3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3651 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3652 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3653 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 3761 _mov(DestLo, T_eax); | 3654 _mov(DestLo, T_eax); |
| 3762 _mov(DestHi, T_edx); | 3655 _mov(DestHi, T_edx); |
| 3763 return; | 3656 return; |
| 3764 } | 3657 } |
| 3765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); | 3658 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); |
| 3766 _mov(T_eax, Expected); | 3659 _mov(T_eax, Expected); |
| 3767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); | 3660 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); |
| 3768 Variable *DesiredReg = legalizeToVar(Desired); | 3661 Variable *DesiredReg = legalizeToVar(Desired); |
| 3769 const bool Locked = true; | 3662 const bool Locked = true; |
| 3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3663 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 3771 _mov(DestPrev, T_eax); | 3664 _mov(DestPrev, T_eax); |
| 3772 } | 3665 } |
| 3773 | 3666 |
| 3774 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | 3667 template <class Machine> |
| 3775 Operand *Expected, | 3668 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
| 3776 Operand *Desired) { | 3669 Operand *PtrToMem, |
| 3670 Operand *Expected, | |
| 3671 Operand *Desired) { | |
| 3777 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3672 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 3778 return false; | 3673 return false; |
| 3779 // Peek ahead a few instructions and see how Dest is used. | 3674 // Peek ahead a few instructions and see how Dest is used. |
| 3780 // It's very common to have: | 3675 // It's very common to have: |
| 3781 // | 3676 // |
| 3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 3677 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
| 3783 // [%y_phi = ...] // list of phi stores | 3678 // [%y_phi = ...] // list of phi stores |
| 3784 // %p = icmp eq i32 %x, %expected | 3679 // %p = icmp eq i32 %x, %expected |
| 3785 // br i1 %p, label %l1, label %l2 | 3680 // br i1 %p, label %l1, label %l2 |
| 3786 // | 3681 // |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3837 NextBr->setDeleted(); | 3732 NextBr->setDeleted(); |
| 3838 Context.advanceNext(); | 3733 Context.advanceNext(); |
| 3839 Context.advanceNext(); | 3734 Context.advanceNext(); |
| 3840 return true; | 3735 return true; |
| 3841 } | 3736 } |
| 3842 } | 3737 } |
| 3843 } | 3738 } |
| 3844 return false; | 3739 return false; |
| 3845 } | 3740 } |
| 3846 | 3741 |
| 3847 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3742 template <class Machine> |
| 3848 Operand *Ptr, Operand *Val) { | 3743 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 3744 Operand *Ptr, Operand *Val) { | |
| 3849 bool NeedsCmpxchg = false; | 3745 bool NeedsCmpxchg = false; |
| 3850 LowerBinOp Op_Lo = nullptr; | 3746 LowerBinOp Op_Lo = nullptr; |
| 3851 LowerBinOp Op_Hi = nullptr; | 3747 LowerBinOp Op_Hi = nullptr; |
| 3852 switch (Operation) { | 3748 switch (Operation) { |
| 3853 default: | 3749 default: |
| 3854 Func->setError("Unknown AtomicRMW operation"); | 3750 Func->setError("Unknown AtomicRMW operation"); |
| 3855 return; | 3751 return; |
| 3856 case Intrinsics::AtomicAdd: { | 3752 case Intrinsics::AtomicAdd: { |
| 3857 if (Dest->getType() == IceType_i64) { | 3753 if (Dest->getType() == IceType_i64) { |
| 3858 // All the fall-through paths must set this to true, but use this | 3754 // All the fall-through paths must set this to true, but use this |
| 3859 // for asserting. | 3755 // for asserting. |
| 3860 NeedsCmpxchg = true; | 3756 NeedsCmpxchg = true; |
| 3861 Op_Lo = &TargetX8632::_add; | 3757 Op_Lo = &TargetX86Base<Machine>::_add; |
| 3862 Op_Hi = &TargetX8632::_adc; | 3758 Op_Hi = &TargetX86Base<Machine>::_adc; |
| 3863 break; | 3759 break; |
| 3864 } | 3760 } |
| 3865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); | 3761 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3866 const bool Locked = true; | 3762 const bool Locked = true; |
| 3867 Variable *T = nullptr; | 3763 Variable *T = nullptr; |
| 3868 _mov(T, Val); | 3764 _mov(T, Val); |
| 3869 _xadd(Addr, T, Locked); | 3765 _xadd(Addr, T, Locked); |
| 3870 _mov(Dest, T); | 3766 _mov(Dest, T); |
| 3871 return; | 3767 return; |
| 3872 } | 3768 } |
| 3873 case Intrinsics::AtomicSub: { | 3769 case Intrinsics::AtomicSub: { |
| 3874 if (Dest->getType() == IceType_i64) { | 3770 if (Dest->getType() == IceType_i64) { |
| 3875 NeedsCmpxchg = true; | 3771 NeedsCmpxchg = true; |
| 3876 Op_Lo = &TargetX8632::_sub; | 3772 Op_Lo = &TargetX86Base<Machine>::_sub; |
| 3877 Op_Hi = &TargetX8632::_sbb; | 3773 Op_Hi = &TargetX86Base<Machine>::_sbb; |
| 3878 break; | 3774 break; |
| 3879 } | 3775 } |
| 3880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); | 3776 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3881 const bool Locked = true; | 3777 const bool Locked = true; |
| 3882 Variable *T = nullptr; | 3778 Variable *T = nullptr; |
| 3883 _mov(T, Val); | 3779 _mov(T, Val); |
| 3884 _neg(T); | 3780 _neg(T); |
| 3885 _xadd(Addr, T, Locked); | 3781 _xadd(Addr, T, Locked); |
| 3886 _mov(Dest, T); | 3782 _mov(Dest, T); |
| 3887 return; | 3783 return; |
| 3888 } | 3784 } |
| 3889 case Intrinsics::AtomicOr: | 3785 case Intrinsics::AtomicOr: |
| 3890 // TODO(jvoung): If Dest is null or dead, then some of these | 3786 // TODO(jvoung): If Dest is null or dead, then some of these |
| 3891 // operations do not need an "exchange", but just a locked op. | 3787 // operations do not need an "exchange", but just a locked op. |
| 3892 // That appears to be "worth" it for sub, or, and, and xor. | 3788 // That appears to be "worth" it for sub, or, and, and xor. |
| 3893 // xadd is probably fine vs lock add for add, and xchg is fine | 3789 // xadd is probably fine vs lock add for add, and xchg is fine |
| 3894 // vs an atomic store. | 3790 // vs an atomic store. |
| 3895 NeedsCmpxchg = true; | 3791 NeedsCmpxchg = true; |
| 3896 Op_Lo = &TargetX8632::_or; | 3792 Op_Lo = &TargetX86Base<Machine>::_or; |
| 3897 Op_Hi = &TargetX8632::_or; | 3793 Op_Hi = &TargetX86Base<Machine>::_or; |
| 3898 break; | 3794 break; |
| 3899 case Intrinsics::AtomicAnd: | 3795 case Intrinsics::AtomicAnd: |
| 3900 NeedsCmpxchg = true; | 3796 NeedsCmpxchg = true; |
| 3901 Op_Lo = &TargetX8632::_and; | 3797 Op_Lo = &TargetX86Base<Machine>::_and; |
| 3902 Op_Hi = &TargetX8632::_and; | 3798 Op_Hi = &TargetX86Base<Machine>::_and; |
| 3903 break; | 3799 break; |
| 3904 case Intrinsics::AtomicXor: | 3800 case Intrinsics::AtomicXor: |
| 3905 NeedsCmpxchg = true; | 3801 NeedsCmpxchg = true; |
| 3906 Op_Lo = &TargetX8632::_xor; | 3802 Op_Lo = &TargetX86Base<Machine>::_xor; |
| 3907 Op_Hi = &TargetX8632::_xor; | 3803 Op_Hi = &TargetX86Base<Machine>::_xor; |
| 3908 break; | 3804 break; |
| 3909 case Intrinsics::AtomicExchange: | 3805 case Intrinsics::AtomicExchange: |
| 3910 if (Dest->getType() == IceType_i64) { | 3806 if (Dest->getType() == IceType_i64) { |
| 3911 NeedsCmpxchg = true; | 3807 NeedsCmpxchg = true; |
| 3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | 3808 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 3913 // just need to be moved to the ecx and ebx registers. | 3809 // just need to be moved to the ecx and ebx registers. |
| 3914 Op_Lo = nullptr; | 3810 Op_Lo = nullptr; |
| 3915 Op_Hi = nullptr; | 3811 Op_Hi = nullptr; |
| 3916 break; | 3812 break; |
| 3917 } | 3813 } |
| 3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); | 3814 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); |
| 3919 Variable *T = nullptr; | 3815 Variable *T = nullptr; |
| 3920 _mov(T, Val); | 3816 _mov(T, Val); |
| 3921 _xchg(Addr, T); | 3817 _xchg(Addr, T); |
| 3922 _mov(Dest, T); | 3818 _mov(Dest, T); |
| 3923 return; | 3819 return; |
| 3924 } | 3820 } |
| 3925 // Otherwise, we need a cmpxchg loop. | 3821 // Otherwise, we need a cmpxchg loop. |
| 3926 (void)NeedsCmpxchg; | 3822 (void)NeedsCmpxchg; |
| 3927 assert(NeedsCmpxchg); | 3823 assert(NeedsCmpxchg); |
| 3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); | 3824 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); |
| 3929 } | 3825 } |
| 3930 | 3826 |
| 3931 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, | 3827 template <class Machine> |
| 3932 Variable *Dest, Operand *Ptr, | 3828 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, |
| 3933 Operand *Val) { | 3829 LowerBinOp Op_Hi, |
| 3830 Variable *Dest, | |
| 3831 Operand *Ptr, | |
| 3832 Operand *Val) { | |
| 3934 // Expand a more complex RMW operation as a cmpxchg loop: | 3833 // Expand a more complex RMW operation as a cmpxchg loop: |
| 3935 // For 64-bit: | 3834 // For 64-bit: |
| 3936 // mov eax, [ptr] | 3835 // mov eax, [ptr] |
| 3937 // mov edx, [ptr + 4] | 3836 // mov edx, [ptr + 4] |
| 3938 // .LABEL: | 3837 // .LABEL: |
| 3939 // mov ebx, eax | 3838 // mov ebx, eax |
| 3940 // <Op_Lo> ebx, <desired_adj_lo> | 3839 // <Op_Lo> ebx, <desired_adj_lo> |
| 3941 // mov ecx, edx | 3840 // mov ecx, edx |
| 3942 // <Op_Hi> ecx, <desired_adj_hi> | 3841 // <Op_Hi> ecx, <desired_adj_hi> |
| 3943 // lock cmpxchg8b [ptr] | 3842 // lock cmpxchg8b [ptr] |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4028 // The address base (if any) is also reused in the loop. | 3927 // The address base (if any) is also reused in the loop. |
| 4029 if (Variable *Base = Addr->getBase()) | 3928 if (Variable *Base = Addr->getBase()) |
| 4030 Context.insert(InstFakeUse::create(Func, Base)); | 3929 Context.insert(InstFakeUse::create(Func, Base)); |
| 4031 _mov(Dest, T_eax); | 3930 _mov(Dest, T_eax); |
| 4032 } | 3931 } |
| 4033 | 3932 |
| 4034 // Lowers count {trailing, leading} zeros intrinsic. | 3933 // Lowers count {trailing, leading} zeros intrinsic. |
| 4035 // | 3934 // |
| 4036 // We could do constant folding here, but that should have | 3935 // We could do constant folding here, but that should have |
| 4037 // been done by the front-end/middle-end optimizations. | 3936 // been done by the front-end/middle-end optimizations. |
| 4038 void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, | 3937 template <class Machine> |
| 4039 Operand *FirstVal, Operand *SecondVal) { | 3938 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| 3939 Operand *FirstVal, | |
| 3940 Operand *SecondVal) { | |
| 4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). | 3941 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
| 4041 // Then the instructions will handle the Val == 0 case much more simply | 3942 // Then the instructions will handle the Val == 0 case much more simply |
| 4042 // and won't require conversion from bit position to number of zeros. | 3943 // and won't require conversion from bit position to number of zeros. |
| 4043 // | 3944 // |
| 4044 // Otherwise: | 3945 // Otherwise: |
| 4045 // bsr IF_NOT_ZERO, Val | 3946 // bsr IF_NOT_ZERO, Val |
| 4046 // mov T_DEST, 63 | 3947 // mov T_DEST, 63 |
| 4047 // cmovne T_DEST, IF_NOT_ZERO | 3948 // cmovne T_DEST, IF_NOT_ZERO |
| 4048 // xor T_DEST, 31 | 3949 // xor T_DEST, 31 |
| 4049 // mov DEST, T_DEST | 3950 // mov DEST, T_DEST |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4100 } else { | 4001 } else { |
| 4101 _bsr(T_Dest2, SecondVar); | 4002 _bsr(T_Dest2, SecondVar); |
| 4102 _xor(T_Dest2, ThirtyOne); | 4003 _xor(T_Dest2, ThirtyOne); |
| 4103 } | 4004 } |
| 4104 _test(SecondVar, SecondVar); | 4005 _test(SecondVar, SecondVar); |
| 4105 _cmov(T_Dest2, T_Dest, CondX86::Br_e); | 4006 _cmov(T_Dest2, T_Dest, CondX86::Br_e); |
| 4106 _mov(DestLo, T_Dest2); | 4007 _mov(DestLo, T_Dest2); |
| 4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 4008 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 4108 } | 4009 } |
| 4109 | 4010 |
| 4110 namespace { | |
| 4111 | |
| 4112 bool isAdd(const Inst *Inst) { | 4011 bool isAdd(const Inst *Inst) { |
| 4113 if (const InstArithmetic *Arith = | 4012 if (const InstArithmetic *Arith = |
| 4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 4013 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 4115 return (Arith->getOp() == InstArithmetic::Add); | 4014 return (Arith->getOp() == InstArithmetic::Add); |
| 4116 } | 4015 } |
| 4117 return false; | 4016 return false; |
| 4118 } | 4017 } |
| 4119 | 4018 |
| 4120 void dumpAddressOpt(const Cfg *Func, const Variable *Base, | 4019 void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| 4121 const Variable *Index, uint16_t Shift, int32_t Offset, | 4020 const Variable *Index, uint16_t Shift, int32_t Offset, |
| (...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4342 // set Index=Var, Offset+=(Const<<Shift) | 4241 // set Index=Var, Offset+=(Const<<Shift) |
| 4343 | 4242 |
| 4344 // Index is Index=Var-Const ==> | 4243 // Index is Index=Var-Const ==> |
| 4345 // set Index=Var, Offset-=(Const<<Shift) | 4244 // set Index=Var, Offset-=(Const<<Shift) |
| 4346 | 4245 |
| 4347 // TODO: consider overflow issues with respect to Offset. | 4246 // TODO: consider overflow issues with respect to Offset. |
| 4348 // TODO: handle symbolic constants. | 4247 // TODO: handle symbolic constants. |
| 4349 } | 4248 } |
| 4350 } | 4249 } |
| 4351 | 4250 |
| 4352 } // anonymous namespace | 4251 template <class Machine> |
| 4353 | 4252 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { |
| 4354 void TargetX8632::lowerLoad(const InstLoad *Load) { | |
| 4355 // A Load instruction can be treated the same as an Assign | 4253 // A Load instruction can be treated the same as an Assign |
| 4356 // instruction, after the source operand is transformed into an | 4254 // instruction, after the source operand is transformed into an |
| 4357 // OperandX8632Mem operand. Note that the address mode | 4255 // OperandX8632Mem operand. Note that the address mode |
| 4358 // optimization already creates an OperandX8632Mem operand, so it | 4256 // optimization already creates an OperandX8632Mem operand, so it |
| 4359 // doesn't need another level of transformation. | 4257 // doesn't need another level of transformation. |
| 4360 Variable *DestLoad = Load->getDest(); | 4258 Variable *DestLoad = Load->getDest(); |
| 4361 Type Ty = DestLoad->getType(); | 4259 Type Ty = DestLoad->getType(); |
| 4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 4260 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
| 4363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 4261 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
| 4364 lowerAssign(Assign); | 4262 lowerAssign(Assign); |
| 4365 } | 4263 } |
| 4366 | 4264 |
| 4367 void TargetX8632::doAddressOptLoad() { | 4265 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
| 4368 Inst *Inst = Context.getCur(); | 4266 Inst *Inst = Context.getCur(); |
| 4369 Variable *Dest = Inst->getDest(); | 4267 Variable *Dest = Inst->getDest(); |
| 4370 Operand *Addr = Inst->getSrc(0); | 4268 Operand *Addr = Inst->getSrc(0); |
| 4371 Variable *Index = nullptr; | 4269 Variable *Index = nullptr; |
| 4372 uint16_t Shift = 0; | 4270 uint16_t Shift = 0; |
| 4373 int32_t Offset = 0; // TODO: make Constant | 4271 int32_t Offset = 0; // TODO: make Constant |
| 4374 // Vanilla ICE load instructions should not use the segment registers, | 4272 // Vanilla ICE load instructions should not use the segment registers, |
| 4375 // and computeAddressOpt only works at the level of Variables and Constants, | 4273 // and computeAddressOpt only works at the level of Variables and Constants, |
| 4376 // not other OperandX8632Mem, so there should be no mention of segment | 4274 // not other OperandX8632Mem, so there should be no mention of segment |
| 4377 // registers there either. | 4275 // registers there either. |
| 4378 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4276 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4379 OperandX8632Mem::DefaultSegment; | 4277 OperandX8632Mem::DefaultSegment; |
| 4380 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4278 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4279 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4382 if (Base && Addr != Base) { | 4280 if (Base && Addr != Base) { |
| 4383 Inst->setDeleted(); | 4281 Inst->setDeleted(); |
| 4384 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4282 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, | 4283 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
| 4386 Shift, SegmentReg); | 4284 Shift, SegmentReg); |
| 4387 Context.insert(InstLoad::create(Func, Dest, Addr)); | 4285 Context.insert(InstLoad::create(Func, Dest, Addr)); |
| 4388 } | 4286 } |
| 4389 } | 4287 } |
| 4390 | 4288 |
| 4391 void TargetX8632::randomlyInsertNop(float Probability) { | 4289 template <class Machine> |
| 4290 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) { | |
| 4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); | 4291 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); |
| 4393 if (RNG.getTrueWithProbability(Probability)) { | 4292 if (RNG.getTrueWithProbability(Probability)) { |
| 4394 _nop(RNG(X86_NUM_NOP_VARIANTS)); | 4293 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS)); |
| 4395 } | 4294 } |
| 4396 } | 4295 } |
| 4397 | 4296 |
| 4398 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { | 4297 template <class Machine> |
| 4298 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | |
| 4399 Func->setError("Phi found in regular instruction list"); | 4299 Func->setError("Phi found in regular instruction list"); |
| 4400 } | 4300 } |
| 4401 | 4301 |
| 4402 void TargetX8632::lowerRet(const InstRet *Inst) { | 4302 template <class Machine> |
| 4303 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) { | |
| 4403 Variable *Reg = nullptr; | 4304 Variable *Reg = nullptr; |
| 4404 if (Inst->hasRetValue()) { | 4305 if (Inst->hasRetValue()) { |
| 4405 Operand *Src0 = legalize(Inst->getRetValue()); | 4306 Operand *Src0 = legalize(Inst->getRetValue()); |
| 4406 if (Src0->getType() == IceType_i64) { | 4307 if (Src0->getType() == IceType_i64) { |
| 4407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); | 4308 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); |
| 4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); | 4309 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); |
| 4409 Reg = eax; | 4310 Reg = eax; |
| 4410 Context.insert(InstFakeUse::create(Func, edx)); | 4311 Context.insert(InstFakeUse::create(Func, edx)); |
| 4411 } else if (isScalarFloatingType(Src0->getType())) { | 4312 } else if (isScalarFloatingType(Src0->getType())) { |
| 4412 _fld(Src0); | 4313 _fld(Src0); |
| 4413 } else if (isVectorType(Src0->getType())) { | 4314 } else if (isVectorType(Src0->getType())) { |
| 4414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); | 4315 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); |
| 4415 } else { | 4316 } else { |
| 4416 _mov(Reg, Src0, RegX8632::Reg_eax); | 4317 _mov(Reg, Src0, RegX8632::Reg_eax); |
| 4417 } | 4318 } |
| 4418 } | 4319 } |
| 4419 // Add a ret instruction even if sandboxing is enabled, because | 4320 // Add a ret instruction even if sandboxing is enabled, because |
| 4420 // addEpilog explicitly looks for a ret instruction as a marker for | 4321 // addEpilog explicitly looks for a ret instruction as a marker for |
| 4421 // where to insert the frame removal instructions. | 4322 // where to insert the frame removal instructions. |
| 4422 _ret(Reg); | 4323 _ret(Reg); |
| 4423 // Add a fake use of esp to make sure esp stays alive for the entire | 4324 // Add a fake use of esp to make sure esp stays alive for the entire |
| 4424 // function. Otherwise post-call esp adjustments get dead-code | 4325 // function. Otherwise post-call esp adjustments get dead-code |
| 4425 // eliminated. TODO: Are there more places where the fake use | 4326 // eliminated. TODO: Are there more places where the fake use |
| 4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 4327 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
| 4427 // have a ret instruction. | 4328 // have a ret instruction. |
| 4428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 4329 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
| 4429 Context.insert(InstFakeUse::create(Func, esp)); | 4330 Context.insert(InstFakeUse::create(Func, esp)); |
| 4430 } | 4331 } |
| 4431 | 4332 |
| 4432 void TargetX8632::lowerSelect(const InstSelect *Inst) { | 4333 template <class Machine> |
| 4334 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { | |
| 4433 Variable *Dest = Inst->getDest(); | 4335 Variable *Dest = Inst->getDest(); |
| 4434 Type DestTy = Dest->getType(); | 4336 Type DestTy = Dest->getType(); |
| 4435 Operand *SrcT = Inst->getTrueOperand(); | 4337 Operand *SrcT = Inst->getTrueOperand(); |
| 4436 Operand *SrcF = Inst->getFalseOperand(); | 4338 Operand *SrcF = Inst->getFalseOperand(); |
| 4437 Operand *Condition = Inst->getCondition(); | 4339 Operand *Condition = Inst->getCondition(); |
| 4438 | 4340 |
| 4439 if (isVectorType(DestTy)) { | 4341 if (isVectorType(DestTy)) { |
| 4440 Type SrcTy = SrcT->getType(); | 4342 Type SrcTy = SrcT->getType(); |
| 4441 Variable *T = makeReg(SrcTy); | 4343 Variable *T = makeReg(SrcTy); |
| 4442 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 4344 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 4345 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| 4444 if (InstructionSet >= SSE4_1) { | 4346 if (InstructionSet >= Machine::SSE4_1) { |
| 4445 // TODO(wala): If the condition operand is a constant, use blendps | 4347 // TODO(wala): If the condition operand is a constant, use blendps |
| 4446 // or pblendw. | 4348 // or pblendw. |
| 4447 // | 4349 // |
| 4448 // Use blendvps or pblendvb to implement select. | 4350 // Use blendvps or pblendvb to implement select. |
| 4449 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 4351 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| 4450 SrcTy == IceType_v4f32) { | 4352 SrcTy == IceType_v4f32) { |
| 4451 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 4353 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); | 4354 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); |
| 4453 _movp(xmm0, ConditionRM); | 4355 _movp(xmm0, ConditionRM); |
| 4454 _psll(xmm0, Ctx->getConstantInt8(31)); | 4356 _psll(xmm0, Ctx->getConstantInt8(31)); |
| 4455 _movp(T, SrcFRM); | 4357 _movp(T, SrcFRM); |
| 4456 _blendvps(T, SrcTRM, xmm0); | 4358 _blendvps(T, SrcTRM, xmm0); |
| 4457 _movp(Dest, T); | 4359 _movp(Dest, T); |
| 4458 } else { | 4360 } else { |
| 4459 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 4361 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| 4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 4362 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
| 4461 : IceType_v16i8; | 4363 : IceType_v16i8; |
| 4462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); | 4364 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); |
| 4463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 4365 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| 4464 _movp(T, SrcFRM); | 4366 _movp(T, SrcFRM); |
| 4465 _pblendvb(T, SrcTRM, xmm0); | 4367 _pblendvb(T, SrcTRM, xmm0); |
| 4466 _movp(Dest, T); | 4368 _movp(Dest, T); |
| 4467 } | 4369 } |
| 4468 return; | 4370 return; |
| 4469 } | 4371 } |
| 4470 // Lower select without SSE4.1: | 4372 // Lower select without Machine::SSE4.1: |
| 4471 // a=d?b:c ==> | 4373 // a=d?b:c ==> |
| 4472 // if elementtype(d) != i1: | 4374 // if elementtype(d) != i1: |
| 4473 // d=sext(d); | 4375 // d=sext(d); |
| 4474 // a=(b&d)|(c&~d); | 4376 // a=(b&d)|(c&~d); |
| 4475 Variable *T2 = makeReg(SrcTy); | 4377 Variable *T2 = makeReg(SrcTy); |
| 4476 // Sign extend the condition operand if applicable. | 4378 // Sign extend the condition operand if applicable. |
| 4477 if (SrcTy == IceType_v4f32) { | 4379 if (SrcTy == IceType_v4f32) { |
| 4478 // The sext operation takes only integer arguments. | 4380 // The sext operation takes only integer arguments. |
| 4479 Variable *T3 = Func->makeVariable(IceType_v4i32); | 4381 Variable *T3 = Func->makeVariable(IceType_v4i32); |
| 4480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); | 4382 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 4498 Operand *CmpOpnd0 = nullptr; | 4400 Operand *CmpOpnd0 = nullptr; |
| 4499 Operand *CmpOpnd1 = nullptr; | 4401 Operand *CmpOpnd1 = nullptr; |
| 4500 // Handle folding opportunities. | 4402 // Handle folding opportunities. |
| 4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4403 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| 4502 assert(Producer->isDeleted()); | 4404 assert(Producer->isDeleted()); |
| 4503 switch (BoolFolding::getProducerKind(Producer)) { | 4405 switch (BoolFolding::getProducerKind(Producer)) { |
| 4504 default: | 4406 default: |
| 4505 break; | 4407 break; |
| 4506 case BoolFolding::PK_Icmp32: { | 4408 case BoolFolding::PK_Icmp32: { |
| 4507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 4409 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| 4508 Cond = getIcmp32Mapping(Cmp->getCondition()); | 4410 Cond = Traits::getIcmp32Mapping(Cmp->getCondition()); |
| 4509 CmpOpnd1 = legalize(Producer->getSrc(1)); | 4411 CmpOpnd1 = legalize(Producer->getSrc(1)); |
| 4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); | 4412 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); |
| 4511 } break; | 4413 } break; |
| 4512 } | 4414 } |
| 4513 } | 4415 } |
| 4514 if (CmpOpnd0 == nullptr) { | 4416 if (CmpOpnd0 == nullptr) { |
| 4515 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); | 4417 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | 4418 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
| 4517 } | 4419 } |
| 4518 assert(CmpOpnd0); | 4420 assert(CmpOpnd0); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4562 | 4464 |
| 4563 assert(DestTy == IceType_i16 || DestTy == IceType_i32); | 4465 assert(DestTy == IceType_i16 || DestTy == IceType_i32); |
| 4564 Variable *T = nullptr; | 4466 Variable *T = nullptr; |
| 4565 SrcF = legalize(SrcF); | 4467 SrcF = legalize(SrcF); |
| 4566 _mov(T, SrcF); | 4468 _mov(T, SrcF); |
| 4567 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4469 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4568 _cmov(T, SrcT, Cond); | 4470 _cmov(T, SrcT, Cond); |
| 4569 _mov(Dest, T); | 4471 _mov(Dest, T); |
| 4570 } | 4472 } |
| 4571 | 4473 |
| 4572 void TargetX8632::lowerStore(const InstStore *Inst) { | 4474 template <class Machine> |
| 4475 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | |
| 4573 Operand *Value = Inst->getData(); | 4476 Operand *Value = Inst->getData(); |
| 4574 Operand *Addr = Inst->getAddr(); | 4477 Operand *Addr = Inst->getAddr(); |
| 4575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); | 4478 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); |
| 4576 Type Ty = NewAddr->getType(); | 4479 Type Ty = NewAddr->getType(); |
| 4577 | 4480 |
| 4578 if (Ty == IceType_i64) { | 4481 if (Ty == IceType_i64) { |
| 4579 Value = legalize(Value); | 4482 Value = legalize(Value); |
| 4580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); | 4483 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); |
| 4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); | 4484 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); |
| 4582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 4485 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
| 4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 4486 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
| 4584 } else if (isVectorType(Ty)) { | 4487 } else if (isVectorType(Ty)) { |
| 4585 _storep(legalizeToVar(Value), NewAddr); | 4488 _storep(legalizeToVar(Value), NewAddr); |
| 4586 } else { | 4489 } else { |
| 4587 Value = legalize(Value, Legal_Reg | Legal_Imm); | 4490 Value = legalize(Value, Legal_Reg | Legal_Imm); |
| 4588 _store(Value, NewAddr); | 4491 _store(Value, NewAddr); |
| 4589 } | 4492 } |
| 4590 } | 4493 } |
| 4591 | 4494 |
| 4592 void TargetX8632::doAddressOptStore() { | 4495 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
| 4593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); | 4496 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); |
| 4594 Operand *Data = Inst->getData(); | 4497 Operand *Data = Inst->getData(); |
| 4595 Operand *Addr = Inst->getAddr(); | 4498 Operand *Addr = Inst->getAddr(); |
| 4596 Variable *Index = nullptr; | 4499 Variable *Index = nullptr; |
| 4597 uint16_t Shift = 0; | 4500 uint16_t Shift = 0; |
| 4598 int32_t Offset = 0; // TODO: make Constant | 4501 int32_t Offset = 0; // TODO: make Constant |
| 4599 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4502 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 4600 // Vanilla ICE store instructions should not use the segment registers, | 4503 // Vanilla ICE store instructions should not use the segment registers, |
| 4601 // and computeAddressOpt only works at the level of Variables and Constants, | 4504 // and computeAddressOpt only works at the level of Variables and Constants, |
| 4602 // not other OperandX8632Mem, so there should be no mention of segment | 4505 // not other OperandX8632Mem, so there should be no mention of segment |
| 4603 // registers there either. | 4506 // registers there either. |
| 4604 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4507 const OperandX8632Mem::SegmentRegisters SegmentReg = |
| 4605 OperandX8632Mem::DefaultSegment; | 4508 OperandX8632Mem::DefaultSegment; |
| 4606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4509 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| 4607 if (Base && Addr != Base) { | 4510 if (Base && Addr != Base) { |
| 4608 Inst->setDeleted(); | 4511 Inst->setDeleted(); |
| 4609 Constant *OffsetOp = Ctx->getConstantInt32(Offset); | 4512 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
| 4610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4513 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| 4611 Shift, SegmentReg); | 4514 Shift, SegmentReg); |
| 4612 InstStore *NewStore = InstStore::create(Func, Data, Addr); | 4515 InstStore *NewStore = InstStore::create(Func, Data, Addr); |
| 4613 if (Inst->getDest()) | 4516 if (Inst->getDest()) |
| 4614 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 4517 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 4615 Context.insert(NewStore); | 4518 Context.insert(NewStore); |
| 4616 } | 4519 } |
| 4617 } | 4520 } |
| 4618 | 4521 |
| 4619 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 4522 template <class Machine> |
| 4523 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | |
| 4620 // This implements the most naive possible lowering. | 4524 // This implements the most naive possible lowering. |
| 4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 4525 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
| 4622 Operand *Src0 = Inst->getComparison(); | 4526 Operand *Src0 = Inst->getComparison(); |
| 4623 SizeT NumCases = Inst->getNumCases(); | 4527 SizeT NumCases = Inst->getNumCases(); |
| 4624 if (Src0->getType() == IceType_i64) { | 4528 if (Src0->getType() == IceType_i64) { |
| 4625 Src0 = legalize(Src0); // get Base/Index into physical registers | 4529 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4626 Operand *Src0Lo = loOperand(Src0); | 4530 Operand *Src0Lo = loOperand(Src0); |
| 4627 Operand *Src0Hi = hiOperand(Src0); | 4531 Operand *Src0Hi = hiOperand(Src0); |
| 4628 if (NumCases >= 2) { | 4532 if (NumCases >= 2) { |
| 4629 Src0Lo = legalizeToVar(Src0Lo); | 4533 Src0Lo = legalizeToVar(Src0Lo); |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 4653 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | 4557 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); |
| 4654 for (SizeT I = 0; I < NumCases; ++I) { | 4558 for (SizeT I = 0; I < NumCases; ++I) { |
| 4655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); | 4559 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); |
| 4656 _cmp(Src0, Value); | 4560 _cmp(Src0, Value); |
| 4657 _br(CondX86::Br_e, Inst->getLabel(I)); | 4561 _br(CondX86::Br_e, Inst->getLabel(I)); |
| 4658 } | 4562 } |
| 4659 | 4563 |
| 4660 _br(Inst->getLabelDefault()); | 4564 _br(Inst->getLabelDefault()); |
| 4661 } | 4565 } |
| 4662 | 4566 |
| 4663 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4567 template <class Machine> |
| 4664 Variable *Dest, Operand *Src0, | 4568 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 4665 Operand *Src1) { | 4569 Variable *Dest, Operand *Src0, |
| 4570 Operand *Src1) { | |
| 4666 assert(isVectorType(Dest->getType())); | 4571 assert(isVectorType(Dest->getType())); |
| 4667 Type Ty = Dest->getType(); | 4572 Type Ty = Dest->getType(); |
| 4668 Type ElementTy = typeElementType(Ty); | 4573 Type ElementTy = typeElementType(Ty); |
| 4669 SizeT NumElements = typeNumElements(Ty); | 4574 SizeT NumElements = typeNumElements(Ty); |
| 4670 | 4575 |
| 4671 Operand *T = Ctx->getConstantUndef(Ty); | 4576 Operand *T = Ctx->getConstantUndef(Ty); |
| 4672 for (SizeT I = 0; I < NumElements; ++I) { | 4577 for (SizeT I = 0; I < NumElements; ++I) { |
| 4673 Constant *Index = Ctx->getConstantInt32(I); | 4578 Constant *Index = Ctx->getConstantInt32(I); |
| 4674 | 4579 |
| 4675 // Extract the next two inputs. | 4580 // Extract the next two inputs. |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 4692 } | 4597 } |
| 4693 | 4598 |
| 4694 // The following pattern occurs often in lowered C and C++ code: | 4599 // The following pattern occurs often in lowered C and C++ code: |
| 4695 // | 4600 // |
| 4696 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 4601 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 4697 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 4602 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 4698 // | 4603 // |
| 4699 // We can eliminate the sext operation by copying the result of pcmpeqd, | 4604 // We can eliminate the sext operation by copying the result of pcmpeqd, |
| 4700 // pcmpgtd, or cmpps (which produce sign extended results) to the result | 4605 // pcmpgtd, or cmpps (which produce sign extended results) to the result |
| 4701 // of the sext operation. | 4606 // of the sext operation. |
| 4702 void TargetX8632::eliminateNextVectorSextInstruction( | 4607 template <class Machine> |
| 4608 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( | |
| 4703 Variable *SignExtendedResult) { | 4609 Variable *SignExtendedResult) { |
| 4704 if (InstCast *NextCast = | 4610 if (InstCast *NextCast = |
| 4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { | 4611 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { |
| 4706 if (NextCast->getCastKind() == InstCast::Sext && | 4612 if (NextCast->getCastKind() == InstCast::Sext && |
| 4707 NextCast->getSrc(0) == SignExtendedResult) { | 4613 NextCast->getSrc(0) == SignExtendedResult) { |
| 4708 NextCast->setDeleted(); | 4614 NextCast->setDeleted(); |
| 4709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); | 4615 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); |
| 4710 // Skip over the instruction. | 4616 // Skip over the instruction. |
| 4711 Context.advanceNext(); | 4617 Context.advanceNext(); |
| 4712 } | 4618 } |
| 4713 } | 4619 } |
| 4714 } | 4620 } |
| 4715 | 4621 |
| 4716 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); } | 4622 template <class Machine> |
| 4623 void TargetX86Base<Machine>::lowerUnreachable( | |
| 4624 const InstUnreachable * /*Inst*/) { | |
| 4625 _ud2(); | |
| 4626 } | |
| 4717 | 4627 |
| 4718 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) { | 4628 template <class Machine> |
| 4629 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) { | |
| 4719 // If the beacon variable's live range does not end in this | 4630 // If the beacon variable's live range does not end in this |
| 4720 // instruction, then it must end in the modified Store instruction | 4631 // instruction, then it must end in the modified Store instruction |
| 4721 // that follows. This means that the original Store instruction is | 4632 // that follows. This means that the original Store instruction is |
| 4722 // still there, either because the value being stored is used beyond | 4633 // still there, either because the value being stored is used beyond |
| 4723 // the Store instruction, or because dead code elimination did not | 4634 // the Store instruction, or because dead code elimination did not |
| 4724 // happen. In either case, we cancel RMW lowering (and the caller | 4635 // happen. In either case, we cancel RMW lowering (and the caller |
| 4725 // deletes the RMW instruction). | 4636 // deletes the RMW instruction). |
| 4726 if (!RMW->isLastUse(RMW->getBeacon())) | 4637 if (!RMW->isLastUse(RMW->getBeacon())) |
| 4727 return; | 4638 return; |
| 4728 Operand *Src = RMW->getData(); | 4639 Operand *Src = RMW->getData(); |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4782 return; | 4693 return; |
| 4783 case InstArithmetic::Xor: | 4694 case InstArithmetic::Xor: |
| 4784 Src = legalize(Src, Legal_Reg | Legal_Imm); | 4695 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| 4785 _xor_rmw(Addr, Src); | 4696 _xor_rmw(Addr, Src); |
| 4786 return; | 4697 return; |
| 4787 } | 4698 } |
| 4788 } | 4699 } |
| 4789 llvm::report_fatal_error("Couldn't lower RMW instruction"); | 4700 llvm::report_fatal_error("Couldn't lower RMW instruction"); |
| 4790 } | 4701 } |
| 4791 | 4702 |
| 4792 void TargetX8632::lowerOther(const Inst *Instr) { | 4703 template <class Machine> |
| 4704 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { | |
| 4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { | 4705 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { |
| 4794 lowerRMW(RMW); | 4706 lowerRMW(RMW); |
| 4795 } else { | 4707 } else { |
| 4796 TargetLowering::lowerOther(Instr); | 4708 TargetLowering::lowerOther(Instr); |
| 4797 } | 4709 } |
| 4798 } | 4710 } |
| 4799 | 4711 |
| 4800 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4712 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4801 // preserve integrity of liveness analysis. Undef values are also | 4713 // preserve integrity of liveness analysis. Undef values are also |
| 4802 // turned into zeroes, since loOperand() and hiOperand() don't expect | 4714 // turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4803 // Undef input. | 4715 // Undef input. |
| 4804 void TargetX8632::prelowerPhis() { | 4716 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| 4805 // Pause constant blinding or pooling, blinding or pooling will be done later | 4717 // Pause constant blinding or pooling, blinding or pooling will be done later |
| 4806 // during phi lowering assignments | 4718 // during phi lowering assignments |
| 4807 BoolFlagSaver B(RandomizationPoolingPaused, true); | 4719 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 4808 | 4720 |
| 4809 CfgNode *Node = Context.getNode(); | 4721 CfgNode *Node = Context.getNode(); |
| 4810 for (Inst &I : Node->getPhis()) { | 4722 for (Inst &I : Node->getPhis()) { |
| 4811 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 4723 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 4812 if (Phi->isDeleted()) | 4724 if (Phi->isDeleted()) |
| 4813 continue; | 4725 continue; |
| 4814 Variable *Dest = Phi->getDest(); | 4726 Variable *Dest = Phi->getDest(); |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 4825 PhiLo->addArgument(loOperand(Src), Label); | 4737 PhiLo->addArgument(loOperand(Src), Label); |
| 4826 PhiHi->addArgument(hiOperand(Src), Label); | 4738 PhiHi->addArgument(hiOperand(Src), Label); |
| 4827 } | 4739 } |
| 4828 Node->getPhis().push_back(PhiLo); | 4740 Node->getPhis().push_back(PhiLo); |
| 4829 Node->getPhis().push_back(PhiHi); | 4741 Node->getPhis().push_back(PhiHi); |
| 4830 Phi->setDeleted(); | 4742 Phi->setDeleted(); |
| 4831 } | 4743 } |
| 4832 } | 4744 } |
| 4833 } | 4745 } |
| 4834 | 4746 |
| 4835 namespace { | |
| 4836 | |
| 4837 bool isMemoryOperand(const Operand *Opnd) { | 4747 bool isMemoryOperand(const Operand *Opnd) { |
| 4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd)) | 4748 if (const auto Var = llvm::dyn_cast<Variable>(Opnd)) |
| 4839 return !Var->hasReg(); | 4749 return !Var->hasReg(); |
| 4840 // We treat vector undef values the same as a memory operand, | 4750 // We treat vector undef values the same as a memory operand, |
| 4841 // because they do in fact need a register to materialize the vector | 4751 // because they do in fact need a register to materialize the vector |
| 4842 // of zeroes into. | 4752 // of zeroes into. |
| 4843 if (llvm::isa<ConstantUndef>(Opnd)) | 4753 if (llvm::isa<ConstantUndef>(Opnd)) |
| 4844 return isScalarFloatingType(Opnd->getType()) || | 4754 return isScalarFloatingType(Opnd->getType()) || |
| 4845 isVectorType(Opnd->getType()); | 4755 isVectorType(Opnd->getType()); |
| 4846 if (llvm::isa<Constant>(Opnd)) | 4756 if (llvm::isa<Constant>(Opnd)) |
| 4847 return isScalarFloatingType(Opnd->getType()); | 4757 return isScalarFloatingType(Opnd->getType()); |
| 4848 return true; | 4758 return true; |
| 4849 } | 4759 } |
| 4850 | 4760 |
| 4851 } // end of anonymous namespace | |
| 4852 | |
| 4853 // Lower the pre-ordered list of assignments into mov instructions. | 4761 // Lower the pre-ordered list of assignments into mov instructions. |
| 4854 // Also has to do some ad-hoc register allocation as necessary. | 4762 // Also has to do some ad-hoc register allocation as necessary. |
| 4855 void TargetX8632::lowerPhiAssignments(CfgNode *Node, | 4763 template <class Machine> |
| 4856 const AssignList &Assignments) { | 4764 void TargetX86Base<Machine>::lowerPhiAssignments( |
| 4765 CfgNode *Node, const AssignList &Assignments) { | |
| 4857 // Check that this is a properly initialized shell of a node. | 4766 // Check that this is a properly initialized shell of a node. |
| 4858 assert(Node->getOutEdges().size() == 1); | 4767 assert(Node->getOutEdges().size() == 1); |
| 4859 assert(Node->getInsts().empty()); | 4768 assert(Node->getInsts().empty()); |
| 4860 assert(Node->getPhis().empty()); | 4769 assert(Node->getPhis().empty()); |
| 4861 CfgNode *Succ = Node->getOutEdges().front(); | 4770 CfgNode *Succ = Node->getOutEdges().front(); |
| 4862 getContext().init(Node); | 4771 getContext().init(Node); |
| 4863 // Register set setup similar to regAlloc(). | 4772 // Register set setup similar to regAlloc(). |
| 4864 RegSetMask RegInclude = RegSet_All; | 4773 RegSetMask RegInclude = RegSet_All; |
| 4865 RegSetMask RegExclude = RegSet_StackPointer; | 4774 RegSetMask RegExclude = RegSet_StackPointer; |
| 4866 if (hasFramePointer()) | 4775 if (hasFramePointer()) |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4997 _br(Succ); | 4906 _br(Succ); |
| 4998 } | 4907 } |
| 4999 | 4908 |
| 5000 // There is no support for loading or emitting vector constants, so the | 4909 // There is no support for loading or emitting vector constants, so the |
| 5001 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | 4910 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
| 5002 // etc. are initialized with register operations. | 4911 // etc. are initialized with register operations. |
| 5003 // | 4912 // |
| 5004 // TODO(wala): Add limited support for vector constants so that | 4913 // TODO(wala): Add limited support for vector constants so that |
| 5005 // complex initialization in registers is unnecessary. | 4914 // complex initialization in registers is unnecessary. |
| 5006 | 4915 |
| 5007 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 4916 template <class Machine> |
| 4917 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { | |
| 5008 Variable *Reg = makeReg(Ty, RegNum); | 4918 Variable *Reg = makeReg(Ty, RegNum); |
| 5009 // Insert a FakeDef, since otherwise the live range of Reg might | 4919 // Insert a FakeDef, since otherwise the live range of Reg might |
| 5010 // be overestimated. | 4920 // be overestimated. |
| 5011 Context.insert(InstFakeDef::create(Func, Reg)); | 4921 Context.insert(InstFakeDef::create(Func, Reg)); |
| 5012 _pxor(Reg, Reg); | 4922 _pxor(Reg, Reg); |
| 5013 return Reg; | 4923 return Reg; |
| 5014 } | 4924 } |
| 5015 | 4925 |
| 5016 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) { | 4926 template <class Machine> |
| 4927 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, | |
| 4928 int32_t RegNum) { | |
| 5017 Variable *MinusOnes = makeReg(Ty, RegNum); | 4929 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 4930 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 5019 Context.insert(InstFakeDef::create(Func, MinusOnes)); | 4931 Context.insert(InstFakeDef::create(Func, MinusOnes)); |
| 5020 _pcmpeq(MinusOnes, MinusOnes); | 4932 _pcmpeq(MinusOnes, MinusOnes); |
| 5021 return MinusOnes; | 4933 return MinusOnes; |
| 5022 } | 4934 } |
| 5023 | 4935 |
| 5024 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 4936 template <class Machine> |
| 4937 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | |
| 5025 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 4938 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 5026 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 4939 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 5027 _psub(Dest, MinusOne); | 4940 _psub(Dest, MinusOne); |
| 5028 return Dest; | 4941 return Dest; |
| 5029 } | 4942 } |
| 5030 | 4943 |
| 5031 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { | 4944 template <class Machine> |
| 4945 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, | |
| 4946 int32_t RegNum) { | |
| 5032 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 4947 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| 5033 Ty == IceType_v16i8); | 4948 Ty == IceType_v16i8); |
| 5034 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 4949 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| 5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 4950 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| 5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; | 4951 SizeT Shift = |
| 4952 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | |
| 5037 _psll(Reg, Ctx->getConstantInt8(Shift)); | 4953 _psll(Reg, Ctx->getConstantInt8(Shift)); |
| 5038 return Reg; | 4954 return Reg; |
| 5039 } else { | 4955 } else { |
| 5040 // SSE has no left shift operation for vectors of 8 bit integers. | 4956 // SSE has no left shift operation for vectors of 8 bit integers. |
| 5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 4957 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 5042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 4958 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 5043 Variable *Reg = makeReg(Ty, RegNum); | 4959 Variable *Reg = makeReg(Ty, RegNum); |
| 5044 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 4960 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 4961 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 5046 return Reg; | 4962 return Reg; |
| 5047 } | 4963 } |
| 5048 } | 4964 } |
| 5049 | 4965 |
| 5050 // Construct a mask in a register that can be and'ed with a | 4966 // Construct a mask in a register that can be and'ed with a |
| 5051 // floating-point value to mask off its sign bit. The value will be | 4967 // floating-point value to mask off its sign bit. The value will be |
| 5052 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> | 4968 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> |
| 5053 // for f64. Construct it as vector of ones logically right shifted | 4969 // for f64. Construct it as vector of ones logically right shifted |
| 5054 // one bit. TODO(stichnot): Fix the wala TODO above, to represent | 4970 // one bit. TODO(stichnot): Fix the wala TODO above, to represent |
| 5055 // vector constants in memory. | 4971 // vector constants in memory. |
| 5056 Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) { | 4972 template <class Machine> |
| 4973 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, | |
| 4974 int32_t RegNum) { | |
| 5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); | 4975 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); |
| 5058 _psrl(Reg, Ctx->getConstantInt8(1)); | 4976 _psrl(Reg, Ctx->getConstantInt8(1)); |
| 5059 return Reg; | 4977 return Reg; |
| 5060 } | 4978 } |
| 5061 | 4979 |
| 5062 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 4980 template <class Machine> |
| 5063 Variable *Slot, | 4981 OperandX8632Mem * |
| 5064 uint32_t Offset) { | 4982 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| 4983 uint32_t Offset) { | |
| 5065 // Ensure that Loc is a stack slot. | 4984 // Ensure that Loc is a stack slot. |
| 5066 assert(Slot->getWeight().isZero()); | 4985 assert(Slot->getWeight().isZero()); |
| 5067 assert(Slot->getRegNum() == Variable::NoRegister); | 4986 assert(Slot->getRegNum() == Variable::NoRegister); |
| 5068 // Compute the location of Loc in memory. | 4987 // Compute the location of Loc in memory. |
| 5069 // TODO(wala,stichnot): lea should not be required. The address of | 4988 // TODO(wala,stichnot): lea should not be required. The address of |
| 5070 // the stack slot is known at compile time (although not until after | 4989 // the stack slot is known at compile time (although not until after |
| 5071 // addProlog()). | 4990 // addProlog()). |
| 5072 const Type PointerType = IceType_i32; | 4991 const Type PointerType = IceType_i32; |
| 5073 Variable *Loc = makeReg(PointerType); | 4992 Variable *Loc = makeReg(PointerType); |
| 5074 _lea(Loc, Slot); | 4993 _lea(Loc, Slot); |
| 5075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 4994 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 5076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | 4995 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); |
| 5077 } | 4996 } |
| 5078 | 4997 |
| 5079 // Helper for legalize() to emit the right code to lower an operand to a | 4998 // Helper for legalize() to emit the right code to lower an operand to a |
| 5080 // register of the appropriate type. | 4999 // register of the appropriate type. |
| 5081 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { | 5000 template <class Machine> |
| 5001 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | |
| 5082 Type Ty = Src->getType(); | 5002 Type Ty = Src->getType(); |
| 5083 Variable *Reg = makeReg(Ty, RegNum); | 5003 Variable *Reg = makeReg(Ty, RegNum); |
| 5084 if (isVectorType(Ty)) { | 5004 if (isVectorType(Ty)) { |
| 5085 _movp(Reg, Src); | 5005 _movp(Reg, Src); |
| 5086 } else { | 5006 } else { |
| 5087 _mov(Reg, Src); | 5007 _mov(Reg, Src); |
| 5088 } | 5008 } |
| 5089 return Reg; | 5009 return Reg; |
| 5090 } | 5010 } |
| 5091 | 5011 |
| 5092 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, | 5012 template <class Machine> |
| 5093 int32_t RegNum) { | 5013 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, |
| 5014 int32_t RegNum) { | |
| 5094 Type Ty = From->getType(); | 5015 Type Ty = From->getType(); |
| 5095 // Assert that a physical register is allowed. To date, all calls | 5016 // Assert that a physical register is allowed. To date, all calls |
| 5096 // to legalize() allow a physical register. If a physical register | 5017 // to legalize() allow a physical register. If a physical register |
| 5097 // needs to be explicitly disallowed, then new code will need to be | 5018 // needs to be explicitly disallowed, then new code will need to be |
| 5098 // written to force a spill. | 5019 // written to force a spill. |
| 5099 assert(Allowed & Legal_Reg); | 5020 assert(Allowed & Legal_Reg); |
| 5100 // If we're asking for a specific physical register, make sure we're | 5021 // If we're asking for a specific physical register, make sure we're |
| 5101 // not allowing any other operand kinds. (This could be future | 5022 // not allowing any other operand kinds. (This could be future |
| 5102 // work, e.g. allow the shl shift amount to be either an immediate | 5023 // work, e.g. allow the shl shift amount to be either an immediate |
| 5103 // or in ecx.) | 5024 // or in ecx.) |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5117 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
| 5197 From = copyToReg(From, RegNum); | 5118 From = copyToReg(From, RegNum); |
| 5198 } | 5119 } |
| 5199 return From; | 5120 return From; |
| 5200 } | 5121 } |
| 5201 llvm_unreachable("Unhandled operand kind in legalize()"); | 5122 llvm_unreachable("Unhandled operand kind in legalize()"); |
| 5202 return From; | 5123 return From; |
| 5203 } | 5124 } |
| 5204 | 5125 |
| 5205 // Provide a trivial wrapper to legalize() for this common usage. | 5126 // Provide a trivial wrapper to legalize() for this common usage. |
| 5206 Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) { | 5127 template <class Machine> |
| 5128 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { | |
| 5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 5129 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
| 5208 } | 5130 } |
| 5209 | 5131 |
| 5210 // For the cmp instruction, if Src1 is an immediate, or known to be a | 5132 // For the cmp instruction, if Src1 is an immediate, or known to be a |
| 5211 // physical register, we can allow Src0 to be a memory operand. | 5133 // physical register, we can allow Src0 to be a memory operand. |
| 5212 // Otherwise, Src0 must be copied into a physical register. | 5134 // Otherwise, Src0 must be copied into a physical register. |
| 5213 // (Actually, either Src0 or Src1 can be chosen for the physical | 5135 // (Actually, either Src0 or Src1 can be chosen for the physical |
| 5214 // register, but unfortunately we have to commit to one or the other | 5136 // register, but unfortunately we have to commit to one or the other |
| 5215 // before register allocation.) | 5137 // before register allocation.) |
| 5216 Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) { | 5138 template <class Machine> |
| 5139 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, | |
| 5140 Operand *Src1) { | |
| 5217 bool IsSrc1ImmOrReg = false; | 5141 bool IsSrc1ImmOrReg = false; |
| 5218 if (llvm::isa<Constant>(Src1)) { | 5142 if (llvm::isa<Constant>(Src1)) { |
| 5219 IsSrc1ImmOrReg = true; | 5143 IsSrc1ImmOrReg = true; |
| 5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 5144 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 5221 if (Var->hasReg()) | 5145 if (Var->hasReg()) |
| 5222 IsSrc1ImmOrReg = true; | 5146 IsSrc1ImmOrReg = true; |
| 5223 } | 5147 } |
| 5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); | 5148 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| 5225 } | 5149 } |
| 5226 | 5150 |
| 5227 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty, | 5151 template <class Machine> |
| 5228 bool DoLegalize) { | 5152 OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, |
| 5153 Type Ty, | |
| 5154 bool DoLegalize) { | |
| 5229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); | 5155 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); |
| 5230 // It may be the case that address mode optimization already creates | 5156 // It may be the case that address mode optimization already creates |
| 5231 // an OperandX8632Mem, so in that case it wouldn't need another level | 5157 // an OperandX8632Mem, so in that case it wouldn't need another level |
| 5232 // of transformation. | 5158 // of transformation. |
| 5233 if (!Mem) { | 5159 if (!Mem) { |
| 5234 Variable *Base = llvm::dyn_cast<Variable>(Opnd); | 5160 Variable *Base = llvm::dyn_cast<Variable>(Opnd); |
| 5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); | 5161 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); |
| 5236 assert(Base || Offset); | 5162 assert(Base || Offset); |
| 5237 if (Offset) { | 5163 if (Offset) { |
| 5238 // During memory operand building, we do not blind or pool | 5164 // During memory operand building, we do not blind or pool |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 5250 llvm::isa<ConstantRelocatable>(Offset)); | 5176 llvm::isa<ConstantRelocatable>(Offset)); |
| 5251 } | 5177 } |
| 5252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); | 5178 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); |
| 5253 } | 5179 } |
| 5254 // Do legalization, which contains randomization/pooling | 5180 // Do legalization, which contains randomization/pooling |
| 5255 // or do randomization/pooling. | 5181 // or do randomization/pooling. |
| 5256 return llvm::cast<OperandX8632Mem>( | 5182 return llvm::cast<OperandX8632Mem>( |
| 5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); | 5183 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); |
| 5258 } | 5184 } |
| 5259 | 5185 |
| 5260 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | 5186 template <class Machine> |
| 5187 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { | |
| 5261 // There aren't any 64-bit integer registers for x86-32. | 5188 // There aren't any 64-bit integer registers for x86-32. |
| 5262 assert(Type != IceType_i64); | 5189 assert(Type != IceType_i64); |
| 5263 Variable *Reg = Func->makeVariable(Type); | 5190 Variable *Reg = Func->makeVariable(Type); |
| 5264 if (RegNum == Variable::NoRegister) | 5191 if (RegNum == Variable::NoRegister) |
| 5265 Reg->setWeightInfinite(); | 5192 Reg->setWeightInfinite(); |
| 5266 else | 5193 else |
| 5267 Reg->setRegNum(RegNum); | 5194 Reg->setRegNum(RegNum); |
| 5268 return Reg; | 5195 return Reg; |
| 5269 } | 5196 } |
| 5270 | 5197 |
| 5271 void TargetX8632::postLower() { | 5198 template <class Machine> void TargetX86Base<Machine>::postLower() { |
| 5272 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 5199 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 5273 return; | 5200 return; |
| 5274 inferTwoAddress(); | 5201 inferTwoAddress(); |
| 5275 } | 5202 } |
| 5276 | 5203 |
| 5277 void TargetX8632::makeRandomRegisterPermutation( | 5204 template <class Machine> |
| 5205 void TargetX86Base<Machine>::makeRandomRegisterPermutation( | |
| 5278 llvm::SmallVectorImpl<int32_t> &Permutation, | 5206 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 5279 const llvm::SmallBitVector &ExcludeRegisters) const { | 5207 const llvm::SmallBitVector &ExcludeRegisters) const { |
| 5280 // TODO(stichnot): Declaring Permutation this way loses type/size | 5208 // TODO(stichnot): Declaring Permutation this way loses type/size |
| 5281 // information. Fix this in conjunction with the caller-side TODO. | 5209 // information. Fix this in conjunction with the caller-side TODO. |
| 5282 assert(Permutation.size() >= RegX8632::Reg_NUM); | 5210 assert(Permutation.size() >= RegX8632::Reg_NUM); |
| 5283 // Expected upper bound on the number of registers in a single | 5211 // Expected upper bound on the number of registers in a single |
| 5284 // equivalence class. For x86-32, this would comprise the 8 XMM | 5212 // equivalence class. For x86-32, this would comprise the 8 XMM |
| 5285 // registers. This is for performance, not correctness. | 5213 // registers. This is for performance, not correctness. |
| 5286 static const unsigned MaxEquivalenceClassSize = 8; | 5214 static const unsigned MaxEquivalenceClassSize = 8; |
| 5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; | 5215 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5334 if (!First) | 5262 if (!First) |
| 5335 Str << " "; | 5263 Str << " "; |
| 5336 First = false; | 5264 First = false; |
| 5337 Str << getRegName(Register, IceType_i32); | 5265 Str << getRegName(Register, IceType_i32); |
| 5338 } | 5266 } |
| 5339 Str << "}\n"; | 5267 Str << "}\n"; |
| 5340 } | 5268 } |
| 5341 } | 5269 } |
| 5342 } | 5270 } |
| 5343 | 5271 |
| 5344 void TargetX8632::emit(const ConstantInteger32 *C) const { | 5272 template <class Machine> |
| 5273 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { | |
| 5345 if (!ALLOW_DUMP) | 5274 if (!ALLOW_DUMP) |
| 5346 return; | 5275 return; |
| 5347 Ostream &Str = Ctx->getStrEmit(); | 5276 Ostream &Str = Ctx->getStrEmit(); |
| 5348 Str << getConstantPrefix() << C->getValue(); | 5277 Str << getConstantPrefix() << C->getValue(); |
| 5349 } | 5278 } |
| 5350 | 5279 |
| 5351 void TargetX8632::emit(const ConstantInteger64 *) const { | 5280 template <class Machine> |
| 5281 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { | |
| 5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 5282 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); |
| 5353 } | 5283 } |
| 5354 | 5284 |
| 5355 void TargetX8632::emit(const ConstantFloat *C) const { | 5285 template <class Machine> |
| 5286 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { | |
| 5356 if (!ALLOW_DUMP) | 5287 if (!ALLOW_DUMP) |
| 5357 return; | 5288 return; |
| 5358 Ostream &Str = Ctx->getStrEmit(); | 5289 Ostream &Str = Ctx->getStrEmit(); |
| 5359 C->emitPoolLabel(Str); | 5290 C->emitPoolLabel(Str); |
| 5360 } | 5291 } |
| 5361 | 5292 |
| 5362 void TargetX8632::emit(const ConstantDouble *C) const { | 5293 template <class Machine> |
| 5294 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const { | |
| 5363 if (!ALLOW_DUMP) | 5295 if (!ALLOW_DUMP) |
| 5364 return; | 5296 return; |
| 5365 Ostream &Str = Ctx->getStrEmit(); | 5297 Ostream &Str = Ctx->getStrEmit(); |
| 5366 C->emitPoolLabel(Str); | 5298 C->emitPoolLabel(Str); |
| 5367 } | 5299 } |
| 5368 | 5300 |
| 5369 void TargetX8632::emit(const ConstantUndef *) const { | 5301 template <class Machine> |
| 5302 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { | |
| 5370 llvm::report_fatal_error("undef value encountered by emitter."); | 5303 llvm::report_fatal_error("undef value encountered by emitter."); |
| 5371 } | 5304 } |
| 5372 | 5305 |
| 5373 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) | |
| 5374 : TargetDataLowering(Ctx) {} | |
| 5375 | |
| 5376 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars, | |
| 5377 const IceString &SectionSuffix) { | |
| 5378 switch (Ctx->getFlags().getOutFileType()) { | |
| 5379 case FT_Elf: { | |
| 5380 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | |
| 5381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix); | |
| 5382 } break; | |
| 5383 case FT_Asm: | |
| 5384 case FT_Iasm: { | |
| 5385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly(); | |
| 5386 OstreamLocker L(Ctx); | |
| 5387 for (const VariableDeclaration *Var : Vars) { | |
| 5388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) { | |
| 5389 emitGlobal(*Var, SectionSuffix); | |
| 5390 } | |
| 5391 } | |
| 5392 } break; | |
| 5393 } | |
| 5394 } | |
| 5395 | |
| 5396 template <typename T> struct PoolTypeConverter {}; | |
| 5397 | |
| 5398 template <> struct PoolTypeConverter<float> { | |
| 5399 typedef uint32_t PrimitiveIntType; | |
| 5400 typedef ConstantFloat IceType; | |
| 5401 static const Type Ty = IceType_f32; | |
| 5402 static const char *TypeName; | |
| 5403 static const char *AsmTag; | |
| 5404 static const char *PrintfString; | |
| 5405 }; | |
| 5406 const char *PoolTypeConverter<float>::TypeName = "float"; | |
| 5407 const char *PoolTypeConverter<float>::AsmTag = ".long"; | |
| 5408 const char *PoolTypeConverter<float>::PrintfString = "0x%x"; | |
| 5409 | |
| 5410 template <> struct PoolTypeConverter<double> { | |
| 5411 typedef uint64_t PrimitiveIntType; | |
| 5412 typedef ConstantDouble IceType; | |
| 5413 static const Type Ty = IceType_f64; | |
| 5414 static const char *TypeName; | |
| 5415 static const char *AsmTag; | |
| 5416 static const char *PrintfString; | |
| 5417 }; | |
| 5418 const char *PoolTypeConverter<double>::TypeName = "double"; | |
| 5419 const char *PoolTypeConverter<double>::AsmTag = ".quad"; | |
| 5420 const char *PoolTypeConverter<double>::PrintfString = "0x%llx"; | |
| 5421 | |
| 5422 // Add converter for int type constant pooling | |
| 5423 template <> struct PoolTypeConverter<uint32_t> { | |
| 5424 typedef uint32_t PrimitiveIntType; | |
| 5425 typedef ConstantInteger32 IceType; | |
| 5426 static const Type Ty = IceType_i32; | |
| 5427 static const char *TypeName; | |
| 5428 static const char *AsmTag; | |
| 5429 static const char *PrintfString; | |
| 5430 }; | |
| 5431 const char *PoolTypeConverter<uint32_t>::TypeName = "i32"; | |
| 5432 const char *PoolTypeConverter<uint32_t>::AsmTag = ".long"; | |
| 5433 const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x"; | |
| 5434 | |
| 5435 // Add converter for int type constant pooling | |
| 5436 template <> struct PoolTypeConverter<uint16_t> { | |
| 5437 typedef uint32_t PrimitiveIntType; | |
| 5438 typedef ConstantInteger32 IceType; | |
| 5439 static const Type Ty = IceType_i16; | |
| 5440 static const char *TypeName; | |
| 5441 static const char *AsmTag; | |
| 5442 static const char *PrintfString; | |
| 5443 }; | |
| 5444 const char *PoolTypeConverter<uint16_t>::TypeName = "i16"; | |
| 5445 const char *PoolTypeConverter<uint16_t>::AsmTag = ".short"; | |
| 5446 const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x"; | |
| 5447 | |
| 5448 // Add converter for int type constant pooling | |
| 5449 template <> struct PoolTypeConverter<uint8_t> { | |
| 5450 typedef uint32_t PrimitiveIntType; | |
| 5451 typedef ConstantInteger32 IceType; | |
| 5452 static const Type Ty = IceType_i8; | |
| 5453 static const char *TypeName; | |
| 5454 static const char *AsmTag; | |
| 5455 static const char *PrintfString; | |
| 5456 }; | |
| 5457 const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; | |
| 5458 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; | |
| 5459 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; | |
| 5460 | |
| 5461 template <typename T> | |
| 5462 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { | |
| 5463 if (!ALLOW_DUMP) | |
| 5464 return; | |
| 5465 Ostream &Str = Ctx->getStrEmit(); | |
| 5466 Type Ty = T::Ty; | |
| 5467 SizeT Align = typeAlignInBytes(Ty); | |
| 5468 ConstantList Pool = Ctx->getConstantPool(Ty); | |
| 5469 | |
| 5470 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align | |
| 5471 << "\n"; | |
| 5472 Str << "\t.align\t" << Align << "\n"; | |
| 5473 for (Constant *C : Pool) { | |
| 5474 if (!C->getShouldBePooled()) | |
| 5475 continue; | |
| 5476 typename T::IceType *Const = llvm::cast<typename T::IceType>(C); | |
| 5477 typename T::IceType::PrimType Value = Const->getValue(); | |
| 5478 // Use memcpy() to copy bits from Value into RawValue in a way | |
| 5479 // that avoids breaking strict-aliasing rules. | |
| 5480 typename T::PrimitiveIntType RawValue; | |
| 5481 memcpy(&RawValue, &Value, sizeof(Value)); | |
| 5482 char buf[30]; | |
| 5483 int CharsPrinted = | |
| 5484 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue); | |
| 5485 assert(CharsPrinted >= 0 && | |
| 5486 (size_t)CharsPrinted < llvm::array_lengthof(buf)); | |
| 5487 (void)CharsPrinted; // avoid warnings if asserts are disabled | |
| 5488 Const->emitPoolLabel(Str); | |
| 5489 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " " | |
| 5490 << Value << "\n"; | |
| 5491 } | |
| 5492 } | |
| 5493 | |
| 5494 void TargetDataX8632::lowerConstants() { | |
| 5495 if (Ctx->getFlags().getDisableTranslation()) | |
| 5496 return; | |
| 5497 // No need to emit constants from the int pool since (for x86) they | |
| 5498 // are embedded as immediates in the instructions, just emit float/double. | |
| 5499 switch (Ctx->getFlags().getOutFileType()) { | |
| 5500 case FT_Elf: { | |
| 5501 ELFObjectWriter *Writer = Ctx->getObjectWriter(); | |
| 5502 | |
| 5503 Writer->writeConstantPool<ConstantInteger32>(IceType_i8); | |
| 5504 Writer->writeConstantPool<ConstantInteger32>(IceType_i16); | |
| 5505 Writer->writeConstantPool<ConstantInteger32>(IceType_i32); | |
| 5506 | |
| 5507 Writer->writeConstantPool<ConstantFloat>(IceType_f32); | |
| 5508 Writer->writeConstantPool<ConstantDouble>(IceType_f64); | |
| 5509 } break; | |
| 5510 case FT_Asm: | |
| 5511 case FT_Iasm: { | |
| 5512 OstreamLocker L(Ctx); | |
| 5513 | |
| 5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx); | |
| 5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx); | |
| 5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx); | |
| 5517 | |
| 5518 emitConstantPool<PoolTypeConverter<float>>(Ctx); | |
| 5519 emitConstantPool<PoolTypeConverter<double>>(Ctx); | |
| 5520 } break; | |
| 5521 } | |
| 5522 } | |
| 5523 | |
| 5524 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) | |
| 5525 : TargetHeaderLowering(Ctx) {} | |
| 5526 | |
| 5527 // Randomize or pool an Immediate. | 5306 // Randomize or pool an Immediate. |
| 5528 Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate, | 5307 template <class Machine> |
| 5529 int32_t RegNum) { | 5308 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, |
| 5309 int32_t RegNum) { | |
| 5530 assert(llvm::isa<ConstantInteger32>(Immediate) || | 5310 assert(llvm::isa<ConstantInteger32>(Immediate) || |
| 5531 llvm::isa<ConstantRelocatable>(Immediate)); | 5311 llvm::isa<ConstantRelocatable>(Immediate)); |
| 5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 5312 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 5533 RandomizationPoolingPaused == true) { | 5313 RandomizationPoolingPaused == true) { |
| 5534 // Immediates randomization/pooling off or paused | 5314 // Immediates randomization/pooling off or paused |
| 5535 return Immediate; | 5315 return Immediate; |
| 5536 } | 5316 } |
| 5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { | 5317 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) { |
| 5538 Ctx->statsUpdateRPImms(); | 5318 Ctx->statsUpdateRPImms(); |
| 5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == | 5319 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); | 5375 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); |
| 5596 _mov(Reg, MemOperand); | 5376 _mov(Reg, MemOperand); |
| 5597 return Reg; | 5377 return Reg; |
| 5598 } | 5378 } |
| 5599 assert("Unsupported -randomize-pool-immediates option" && false); | 5379 assert("Unsupported -randomize-pool-immediates option" && false); |
| 5600 } | 5380 } |
| 5601 // the constant Immediate is not eligible for blinding/pooling | 5381 // the constant Immediate is not eligible for blinding/pooling |
| 5602 return Immediate; | 5382 return Immediate; |
| 5603 } | 5383 } |
| 5604 | 5384 |
| 5385 template <class Machine> | |
| 5605 OperandX8632Mem * | 5386 OperandX8632Mem * |
| 5606 TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, | 5387 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, |
| 5607 int32_t RegNum) { | 5388 int32_t RegNum) { |
| 5608 assert(MemOperand); | 5389 assert(MemOperand); |
| 5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || | 5390 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || |
| 5610 RandomizationPoolingPaused == true) { | 5391 RandomizationPoolingPaused == true) { |
| 5611 // immediates randomization/pooling is turned off | 5392 // immediates randomization/pooling is turned off |
| 5612 return MemOperand; | 5393 return MemOperand; |
| 5613 } | 5394 } |
| 5614 | 5395 |
| 5615 // If this memory operand is already a randommized one, we do | 5396 // If this memory operand is already a randommized one, we do |
| 5616 // not randomize it again. | 5397 // not randomize it again. |
| 5617 if (MemOperand->getRandomized()) | 5398 if (MemOperand->getRandomized()) |
| 5618 return MemOperand; | 5399 return MemOperand; |
| 5619 | 5400 |
| 5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { | 5401 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { |
| 5621 if (C->shouldBeRandomizedOrPooled(Ctx)) { | 5402 if (C->shouldBeRandomizedOrPooled(Ctx)) { |
| 5622 // The offset of this mem operand should be blinded or pooled | 5403 // The offset of this mem operand should be blinded or pooled |
| 5623 Ctx->statsUpdateRPImms(); | 5404 Ctx->statsUpdateRPImms(); |
| 5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == | 5405 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == |
| 5625 RPI_Randomize) { | 5406 RPI_Randomize) { |
| 5626 // blind the constant offset | 5407 // blind the constant offset |
| 5627 // FROM: | 5408 // FROM: |
| 5628 // offset[base, index, shift] | 5409 // offset[base, index, shift] |
| 5629 // TO: | 5410 // TO: |
| 5630 // insert: lea offset+cookie[base], RegTemp | 5411 // insert: lea offset+cookie[base], RegTemp |
| 5631 // => -cookie[RegTemp, index, shift] | 5412 // => -cookie[RegTemp, index, shift] |
| 5632 uint32_t Value = | 5413 uint32_t Value = llvm::dyn_cast<ConstantInteger32>( |
| 5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) | 5414 MemOperand->getOffset())->getValue(); |
| 5634 ->getValue(); | |
| 5635 uint32_t Cookie = Ctx->getRandomizationCookie(); | 5415 uint32_t Cookie = Ctx->getRandomizationCookie(); |
| 5636 Constant *Mask1 = Ctx->getConstantInt( | 5416 Constant *Mask1 = Ctx->getConstantInt( |
| 5637 MemOperand->getOffset()->getType(), Cookie + Value); | 5417 MemOperand->getOffset()->getType(), Cookie + Value); |
| 5638 Constant *Mask2 = | 5418 Constant *Mask2 = |
| 5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); | 5419 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); |
| 5640 | 5420 |
| 5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( | 5421 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( |
| 5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); | 5422 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); |
| 5643 // If we have already assigned a physical register, we must come from | 5423 // If we have already assigned a physical register, we must come from |
| 5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse | 5424 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5710 return NewMemOperand; | 5490 return NewMemOperand; |
| 5711 } | 5491 } |
| 5712 assert("Unsupported -randomize-pool-immediates option" && false); | 5492 assert("Unsupported -randomize-pool-immediates option" && false); |
| 5713 } | 5493 } |
| 5714 } | 5494 } |
| 5715 // the offset is not eligible for blinding or pooling, return the original | 5495 // the offset is not eligible for blinding or pooling, return the original |
| 5716 // mem operand | 5496 // mem operand |
| 5717 return MemOperand; | 5497 return MemOperand; |
| 5718 } | 5498 } |
| 5719 | 5499 |
| 5500 } // end of namespace X86Internal | |
| 5720 } // end of namespace Ice | 5501 } // end of namespace Ice |
| 5502 | |
| 5503 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | |
| 5504 | |
|
Jim Stichnoth
2015/06/22 23:04:05
git (or someone) warns about this "trailing whites
John
2015/06/22 23:09:55
Done.
I usually add the extra newline so that
ca
| |
| OLD | NEW |