| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 92 | 92 |
| 93 private: | 93 private: |
| 94 BoolFolding(const BoolFolding &) = delete; | 94 BoolFolding(const BoolFolding &) = delete; |
| 95 BoolFolding &operator=(const BoolFolding &) = delete; | 95 BoolFolding &operator=(const BoolFolding &) = delete; |
| 96 | 96 |
| 97 public: | 97 public: |
| 98 BoolFolding() = default; | 98 BoolFolding() = default; |
| 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); | 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); |
| 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); | 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); |
| 101 static bool hasComplexLowering(const Inst *Instr); | 101 static bool hasComplexLowering(const Inst *Instr); |
| 102 static bool isValidFolding(BoolFoldingProducerKind ProducerKind, |
| 103 BoolFoldingConsumerKind ConsumerKind); |
| 102 void init(CfgNode *Node); | 104 void init(CfgNode *Node); |
| 103 const Inst *getProducerFor(const Operand *Opnd) const; | 105 const Inst *getProducerFor(const Operand *Opnd) const; |
| 104 void dump(const Cfg *Func) const; | 106 void dump(const Cfg *Func) const; |
| 105 | 107 |
| 106 private: | 108 private: |
| 107 /// Returns true if Producers contains a valid entry for the given VarNum. | 109 /// Returns true if Producers contains a valid entry for the given VarNum. |
| 108 bool containsValid(SizeT VarNum) const { | 110 bool containsValid(SizeT VarNum) const { |
| 109 auto Element = Producers.find(VarNum); | 111 auto Element = Producers.find(VarNum); |
| 110 return Element != Producers.end() && Element->second.Instr != nullptr; | 112 return Element != Producers.end() && Element->second.Instr != nullptr; |
| 111 } | 113 } |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 186 return false; | 188 return false; |
| 187 case PK_Icmp64: | 189 case PK_Icmp64: |
| 188 return true; | 190 return true; |
| 189 case PK_Fcmp: | 191 case PK_Fcmp: |
| 190 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 191 .C2 != MachineTraits::Cond::Br_None; | 193 .C2 != MachineTraits::Cond::Br_None; |
| 192 } | 194 } |
| 193 } | 195 } |
| 194 | 196 |
| 195 template <class MachineTraits> | 197 template <class MachineTraits> |
| 198 bool BoolFolding<MachineTraits>::isValidFolding( |
| 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, |
| 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { |
| 201 switch (ProducerKind) { |
| 202 default: |
| 203 return false; |
| 204 case PK_Icmp32: |
| 205 case PK_Icmp64: |
| 206 case PK_Fcmp: |
| 207 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select); |
| 208 case PK_Arith: |
| 209 return ConsumerKind == CK_Br; |
| 210 } |
| 211 } |
| 212 |
| 213 template <class MachineTraits> |
| 196 void BoolFolding<MachineTraits>::init(CfgNode *Node) { | 214 void BoolFolding<MachineTraits>::init(CfgNode *Node) { |
| 197 Producers.clear(); | 215 Producers.clear(); |
| 198 for (Inst &Instr : Node->getInsts()) { | 216 for (Inst &Instr : Node->getInsts()) { |
| 199 // Check whether Instr is a valid producer. | 217 // Check whether Instr is a valid producer. |
| 200 Variable *Var = Instr.getDest(); | 218 Variable *Var = Instr.getDest(); |
| 201 if (!Instr.isDeleted() // only consider non-deleted instructions | 219 if (!Instr.isDeleted() // only consider non-deleted instructions |
| 202 && Var // only instructions with an actual dest var | 220 && Var // only instructions with an actual dest var |
| 203 && Var->getType() == IceType_i1 // only bool-type dest vars | 221 && Var->getType() == IceType_i1 // only bool-type dest vars |
| 204 && getProducerKind(&Instr) != PK_None) { // white-listed instructions | 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions |
| 205 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); | 223 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); |
| 206 } | 224 } |
| 207 // Check each src variable against the map. | 225 // Check each src variable against the map. |
| 208 FOREACH_VAR_IN_INST(Var, Instr) { | 226 FOREACH_VAR_IN_INST(Var, Instr) { |
| 209 SizeT VarNum = Var->getIndex(); | 227 SizeT VarNum = Var->getIndex(); |
| 210 if (containsValid(VarNum)) { | 228 if (!containsValid(VarNum)) |
| 211 if (IndexOfVarOperandInInst(Var) != | 229 continue; |
| 212 0 // All valid consumers use Var as the first source operand | 230 // All valid consumers use Var as the first source operand |
| 213 || | 231 if (IndexOfVarOperandInInst(Var) != 0) { |
| 214 getConsumerKind(&Instr) == CK_None // must be white-listed | 232 setInvalid(VarNum); |
| 215 || | 233 continue; |
| 216 (getConsumerKind(&Instr) != CK_Br && // Icmp64 only folds in branch | 234 } |
| 217 getProducerKind(Producers[VarNum].Instr) != PK_Icmp32) || | 235 // Consumer instructions must be white-listed |
| 218 (Producers[VarNum].IsComplex && // complex can't be multi-use | 236 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind |
| 219 Producers[VarNum].NumUses > 0)) { | 237 ConsumerKind = getConsumerKind(&Instr); |
| 220 setInvalid(VarNum); | 238 if (ConsumerKind == CK_None) { |
| 221 continue; | 239 setInvalid(VarNum); |
| 222 } | 240 continue; |
| 223 ++Producers[VarNum].NumUses; | 241 } |
| 224 if (Instr.isLastUse(Var)) { | 242 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind |
| 225 Producers[VarNum].IsLiveOut = false; | 243 ProducerKind = getProducerKind(Producers[VarNum].Instr); |
| 226 } | 244 if (!isValidFolding(ProducerKind, ConsumerKind)) { |
| 245 setInvalid(VarNum); |
| 246 continue; |
| 247 } |
| 248 // Avoid creating multiple copies of complex producer instructions. |
| 249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) { |
| 250 setInvalid(VarNum); |
| 251 continue; |
| 252 } |
| 253 ++Producers[VarNum].NumUses; |
| 254 if (Instr.isLastUse(Var)) { |
| 255 Producers[VarNum].IsLiveOut = false; |
| 227 } | 256 } |
| 228 } | 257 } |
| 229 } | 258 } |
| 230 for (auto &I : Producers) { | 259 for (auto &I : Producers) { |
| 231 // Ignore entries previously marked invalid. | 260 // Ignore entries previously marked invalid. |
| 232 if (I.second.Instr == nullptr) | 261 if (I.second.Instr == nullptr) |
| 233 continue; | 262 continue; |
| 234 // Disable the producer if its dest may be live beyond this block. | 263 // Disable the producer if its dest may be live beyond this block. |
| 235 if (I.second.IsLiveOut) { | 264 if (I.second.IsLiveOut) { |
| 236 setInvalid(I.first); | 265 setInvalid(I.first); |
| (...skipping 1027 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1264 // test t1, 0x20 | 1293 // test t1, 0x20 |
| 1265 // je L1 | 1294 // je L1 |
| 1266 // use(t3) | 1295 // use(t3) |
| 1267 // t3 = t2 | 1296 // t3 = t2 |
| 1268 // t2 = 0 | 1297 // t2 = 0 |
| 1269 _shld(T_3, T_2, T_1); | 1298 _shld(T_3, T_2, T_1); |
| 1270 _shl(T_2, T_1); | 1299 _shl(T_2, T_1); |
| 1271 _test(T_1, BitTest); | 1300 _test(T_1, BitTest); |
| 1272 _br(Traits::Cond::Br_e, Label); | 1301 _br(Traits::Cond::Br_e, Label); |
| 1273 // T_2 and T_3 are being assigned again because of the intra-block control | 1302 // T_2 and T_3 are being assigned again because of the intra-block control |
| 1274 // flow, so we need the _mov_redefined variant to avoid liveness problems. | 1303 // flow, so we need to use _redefined to avoid liveness problems. |
| 1275 _mov_redefined(T_3, T_2); | 1304 _redefined(_mov(T_3, T_2)); |
| 1276 _mov_redefined(T_2, Zero); | 1305 _redefined(_mov(T_2, Zero)); |
| 1277 } break; | 1306 } break; |
| 1278 case InstArithmetic::Lshr: { | 1307 case InstArithmetic::Lshr: { |
| 1279 // a=b>>c (unsigned) ==> | 1308 // a=b>>c (unsigned) ==> |
| 1280 // t2 = shrd t2, t3, t1 | 1309 // t2 = shrd t2, t3, t1 |
| 1281 // t3 = shr t3, t1 | 1310 // t3 = shr t3, t1 |
| 1282 // test t1, 0x20 | 1311 // test t1, 0x20 |
| 1283 // je L1 | 1312 // je L1 |
| 1284 // use(t2) | 1313 // use(t2) |
| 1285 // t2 = t3 | 1314 // t2 = t3 |
| 1286 // t3 = 0 | 1315 // t3 = 0 |
| 1287 _shrd(T_2, T_3, T_1); | 1316 _shrd(T_2, T_3, T_1); |
| 1288 _shr(T_3, T_1); | 1317 _shr(T_3, T_1); |
| 1289 _test(T_1, BitTest); | 1318 _test(T_1, BitTest); |
| 1290 _br(Traits::Cond::Br_e, Label); | 1319 _br(Traits::Cond::Br_e, Label); |
| 1291 // T_2 and T_3 are being assigned again because of the intra-block control | 1320 // T_2 and T_3 are being assigned again because of the intra-block control |
| 1292 // flow, so we need the _mov_redefined variant to avoid liveness problems. | 1321 // flow, so we need to use _redefined to avoid liveness problems. |
| 1293 _mov_redefined(T_2, T_3); | 1322 _redefined(_mov(T_2, T_3)); |
| 1294 _mov_redefined(T_3, Zero); | 1323 _redefined(_mov(T_3, Zero)); |
| 1295 } break; | 1324 } break; |
| 1296 case InstArithmetic::Ashr: { | 1325 case InstArithmetic::Ashr: { |
| 1297 // a=b>>c (signed) ==> | 1326 // a=b>>c (signed) ==> |
| 1298 // t2 = shrd t2, t3, t1 | 1327 // t2 = shrd t2, t3, t1 |
| 1299 // t3 = sar t3, t1 | 1328 // t3 = sar t3, t1 |
| 1300 // test t1, 0x20 | 1329 // test t1, 0x20 |
| 1301 // je L1 | 1330 // je L1 |
| 1302 // use(t2) | 1331 // use(t2) |
| 1303 // t2 = t3 | 1332 // t2 = t3 |
| 1304 // t3 = sar t3, 0x1f | 1333 // t3 = sar t3, 0x1f |
| 1305 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1334 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| 1306 _shrd(T_2, T_3, T_1); | 1335 _shrd(T_2, T_3, T_1); |
| 1307 _sar(T_3, T_1); | 1336 _sar(T_3, T_1); |
| 1308 _test(T_1, BitTest); | 1337 _test(T_1, BitTest); |
| 1309 _br(Traits::Cond::Br_e, Label); | 1338 _br(Traits::Cond::Br_e, Label); |
| 1310 // T_2 and T_3 are being assigned again because of the intra-block control | 1339 // T_2 and T_3 are being assigned again because of the intra-block control |
| 1311 // flow, so T_2 needs the _mov_redefined variant to avoid liveness | 1340 // flow, so T_2 needs to use _redefined to avoid liveness problems. T_3 |
| 1312 // problems. T_3 doesn't need special treatment because it is reassigned | 1341 // doesn't need special treatment because it is reassigned via _sar |
| 1313 // via _sar instead of _mov. | 1342 // instead of _mov. |
| 1314 _mov_redefined(T_2, T_3); | 1343 _redefined(_mov(T_2, T_3)); |
| 1315 _sar(T_3, SignExtend); | 1344 _sar(T_3, SignExtend); |
| 1316 } break; | 1345 } break; |
| 1317 } | 1346 } |
| 1318 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> | 1347 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> |
| 1319 // L1: | 1348 // L1: |
| 1320 // a.lo = t2 | 1349 // a.lo = t2 |
| 1321 // a.hi = t3 | 1350 // a.hi = t3 |
| 1322 Context.insert(Label); | 1351 Context.insert(Label); |
| 1323 _mov(DestLo, T_2); | 1352 _mov(DestLo, T_2); |
| 1324 _mov(DestHi, T_3); | 1353 _mov(DestHi, T_3); |
| (...skipping 553 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1878 } | 1907 } |
| 1879 } | 1908 } |
| 1880 | 1909 |
| 1881 template <class Machine> | 1910 template <class Machine> |
| 1882 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1911 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
| 1883 Variable *Dest = Inst->getDest(); | 1912 Variable *Dest = Inst->getDest(); |
| 1884 if (Dest->isRematerializable()) { | 1913 if (Dest->isRematerializable()) { |
| 1885 Context.insert(InstFakeDef::create(Func, Dest)); | 1914 Context.insert(InstFakeDef::create(Func, Dest)); |
| 1886 return; | 1915 return; |
| 1887 } | 1916 } |
| 1888 Operand *Src0 = Inst->getSrc(0); | 1917 Operand *Src = Inst->getSrc(0); |
| 1889 assert(Dest->getType() == Src0->getType()); | 1918 assert(Dest->getType() == Src->getType()); |
| 1890 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1919 lowerMove(Dest, Src, false); |
| 1891 Src0 = legalize(Src0); | |
| 1892 Operand *Src0Lo = loOperand(Src0); | |
| 1893 Operand *Src0Hi = hiOperand(Src0); | |
| 1894 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1895 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1896 Variable *T_Lo = nullptr, *T_Hi = nullptr; | |
| 1897 _mov(T_Lo, Src0Lo); | |
| 1898 _mov(DestLo, T_Lo); | |
| 1899 _mov(T_Hi, Src0Hi); | |
| 1900 _mov(DestHi, T_Hi); | |
| 1901 } else { | |
| 1902 Operand *Src0Legal; | |
| 1903 if (Dest->hasReg()) { | |
| 1904 // If Dest already has a physical register, then only basic legalization | |
| 1905 // is needed, as the source operand can be a register, immediate, or | |
| 1906 // memory. | |
| 1907 Src0Legal = legalize(Src0, Legal_Reg, Dest->getRegNum()); | |
| 1908 } else { | |
| 1909 // If Dest could be a stack operand, then RI must be a physical register | |
| 1910 // or a scalar integer immediate. | |
| 1911 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm); | |
| 1912 } | |
| 1913 if (isVectorType(Dest->getType())) | |
| 1914 _movp(Dest, Src0Legal); | |
| 1915 else | |
| 1916 _mov(Dest, Src0Legal); | |
| 1917 } | |
| 1918 } | 1920 } |
| 1919 | 1921 |
| 1920 template <class Machine> | 1922 template <class Machine> |
| 1921 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { | 1923 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) { |
| 1922 if (Inst->isUnconditional()) { | 1924 if (Br->isUnconditional()) { |
| 1923 _br(Inst->getTargetUnconditional()); | 1925 _br(Br->getTargetUnconditional()); |
| 1924 return; | 1926 return; |
| 1925 } | 1927 } |
| 1926 Operand *Cond = Inst->getCondition(); | 1928 Operand *Cond = Br->getCondition(); |
| 1927 | 1929 |
| 1928 // Handle folding opportunities. | 1930 // Handle folding opportunities. |
| 1929 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { | 1931 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
| 1930 assert(Producer->isDeleted()); | 1932 assert(Producer->isDeleted()); |
| 1931 switch (BoolFolding::getProducerKind(Producer)) { | 1933 switch (BoolFolding::getProducerKind(Producer)) { |
| 1932 default: | 1934 default: |
| 1933 break; | 1935 break; |
| 1934 case BoolFolding::PK_Icmp32: | 1936 case BoolFolding::PK_Icmp32: |
| 1935 case BoolFolding::PK_Icmp64: { | 1937 case BoolFolding::PK_Icmp64: { |
| 1936 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); | 1938 lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Br); |
| 1937 return; | 1939 return; |
| 1938 } | 1940 } |
| 1939 case BoolFolding::PK_Fcmp: { | 1941 case BoolFolding::PK_Fcmp: { |
| 1940 lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst); | 1942 lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Br); |
| 1941 return; | 1943 return; |
| 1942 } | 1944 } |
| 1943 case BoolFolding::PK_Arith: { | 1945 case BoolFolding::PK_Arith: { |
| 1944 lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst); | 1946 lowerArithAndConsumer(llvm::dyn_cast<InstArithmetic>(Producer), Br); |
| 1945 return; | 1947 return; |
| 1946 } | 1948 } |
| 1947 } | 1949 } |
| 1948 } | 1950 } |
| 1949 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 1951 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 1950 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1952 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1951 _cmp(Src0, Zero); | 1953 _cmp(Src0, Zero); |
| 1952 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1954 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 1953 } | 1955 } |
| 1954 | 1956 |
| 1955 template <class Machine> | 1957 template <class Machine> |
| 1956 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 1958 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
| 1957 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 1959 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
| 1958 InstCast::OpKind CastKind = Inst->getCastKind(); | 1960 InstCast::OpKind CastKind = Inst->getCastKind(); |
| 1959 Variable *Dest = Inst->getDest(); | 1961 Variable *Dest = Inst->getDest(); |
| 1960 Type DestTy = Dest->getType(); | 1962 Type DestTy = Dest->getType(); |
| 1961 switch (CastKind) { | 1963 switch (CastKind) { |
| 1962 default: | 1964 default: |
| (...skipping 513 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2476 lowerCast(Cast); | 2478 lowerCast(Cast); |
| 2477 ExtractedElementR = T; | 2479 ExtractedElementR = T; |
| 2478 } | 2480 } |
| 2479 | 2481 |
| 2480 // Copy the element to the destination. | 2482 // Copy the element to the destination. |
| 2481 Variable *Dest = Inst->getDest(); | 2483 Variable *Dest = Inst->getDest(); |
| 2482 _mov(Dest, ExtractedElementR); | 2484 _mov(Dest, ExtractedElementR); |
| 2483 } | 2485 } |
| 2484 | 2486 |
| 2485 template <class Machine> | 2487 template <class Machine> |
| 2486 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { | 2488 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) { |
| 2487 constexpr InstBr *Br = nullptr; | 2489 Variable *Dest = Fcmp->getDest(); |
| 2488 lowerFcmpAndBr(Inst, Br); | 2490 |
| 2491 if (isVectorType(Dest->getType())) { |
| 2492 lowerFcmpVector(Fcmp); |
| 2493 } else { |
| 2494 constexpr Inst *Consumer = nullptr; |
| 2495 lowerFcmpAndConsumer(Fcmp, Consumer); |
| 2496 } |
| 2489 } | 2497 } |
| 2490 | 2498 |
| 2491 template <class Machine> | 2499 template <class Machine> |
| 2492 void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, | 2500 void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp, |
| 2493 const InstBr *Br) { | 2501 const Inst *Consumer) { |
| 2494 Operand *Src0 = Inst->getSrc(0); | 2502 Operand *Src0 = Fcmp->getSrc(0); |
| 2495 Operand *Src1 = Inst->getSrc(1); | 2503 Operand *Src1 = Fcmp->getSrc(1); |
| 2496 Variable *Dest = Inst->getDest(); | 2504 Variable *Dest = Fcmp->getDest(); |
| 2497 | 2505 |
| 2498 if (isVectorType(Dest->getType())) { | 2506 if (isVectorType(Dest->getType())) |
| 2499 if (Br) | 2507 llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
| 2500 llvm::report_fatal_error("vector compare/branch cannot be folded"); | |
| 2501 InstFcmp::FCond Condition = Inst->getCondition(); | |
| 2502 size_t Index = static_cast<size_t>(Condition); | |
| 2503 assert(Index < Traits::TableFcmpSize); | |
| 2504 | 2508 |
| 2505 if (Traits::TableFcmp[Index].SwapVectorOperands) | 2509 if (Consumer != nullptr) { |
| 2506 std::swap(Src0, Src1); | 2510 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2507 | 2511 if (lowerOptimizeFcmpSelect(Fcmp, Select)) |
| 2508 Variable *T = nullptr; | 2512 return; |
| 2509 | |
| 2510 if (Condition == InstFcmp::True) { | |
| 2511 // makeVectorOfOnes() requires an integer vector type. | |
| 2512 T = makeVectorOfMinusOnes(IceType_v4i32); | |
| 2513 } else if (Condition == InstFcmp::False) { | |
| 2514 T = makeVectorOfZeros(Dest->getType()); | |
| 2515 } else { | |
| 2516 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 2517 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 2518 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | |
| 2519 Src1RM = legalizeToReg(Src1RM); | |
| 2520 | |
| 2521 switch (Condition) { | |
| 2522 default: { | |
| 2523 typename Traits::Cond::CmppsCond Predicate = | |
| 2524 Traits::TableFcmp[Index].Predicate; | |
| 2525 assert(Predicate != Traits::Cond::Cmpps_Invalid); | |
| 2526 T = makeReg(Src0RM->getType()); | |
| 2527 _movp(T, Src0RM); | |
| 2528 _cmpps(T, Src1RM, Predicate); | |
| 2529 } break; | |
| 2530 case InstFcmp::One: { | |
| 2531 // Check both unequal and ordered. | |
| 2532 T = makeReg(Src0RM->getType()); | |
| 2533 Variable *T2 = makeReg(Src0RM->getType()); | |
| 2534 _movp(T, Src0RM); | |
| 2535 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq); | |
| 2536 _movp(T2, Src0RM); | |
| 2537 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord); | |
| 2538 _pand(T, T2); | |
| 2539 } break; | |
| 2540 case InstFcmp::Ueq: { | |
| 2541 // Check both equal or unordered. | |
| 2542 T = makeReg(Src0RM->getType()); | |
| 2543 Variable *T2 = makeReg(Src0RM->getType()); | |
| 2544 _movp(T, Src0RM); | |
| 2545 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq); | |
| 2546 _movp(T2, Src0RM); | |
| 2547 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord); | |
| 2548 _por(T, T2); | |
| 2549 } break; | |
| 2550 } | |
| 2551 } | 2513 } |
| 2552 | |
| 2553 _movp(Dest, T); | |
| 2554 eliminateNextVectorSextInstruction(Dest); | |
| 2555 return; | |
| 2556 } | 2514 } |
| 2557 | 2515 |
| 2558 // Lowering a = fcmp cond, b, c | 2516 // Lowering a = fcmp cond, b, c |
| 2559 // ucomiss b, c /* only if C1 != Br_None */ | 2517 // ucomiss b, c /* only if C1 != Br_None */ |
| 2560 // /* but swap b,c order if SwapOperands==true */ | 2518 // /* but swap b,c order if SwapOperands==true */ |
| 2561 // mov a, <default> | 2519 // mov a, <default> |
| 2562 // j<C1> label /* only if C1 != Br_None */ | 2520 // j<C1> label /* only if C1 != Br_None */ |
| 2563 // j<C2> label /* only if C2 != Br_None */ | 2521 // j<C2> label /* only if C2 != Br_None */ |
| 2564 // FakeUse(a) /* only if C1 != Br_None */ | 2522 // FakeUse(a) /* only if C1 != Br_None */ |
| 2565 // mov a, !<default> /* only if C1 != Br_None */ | 2523 // mov a, !<default> /* only if C1 != Br_None */ |
| 2566 // label: /* only if C1 != Br_None */ | 2524 // label: /* only if C1 != Br_None */ |
| 2567 // | 2525 // |
| 2568 // setcc lowering when C1 != Br_None && C2 == Br_None: | 2526 // setcc lowering when C1 != Br_None && C2 == Br_None: |
| 2569 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ | 2527 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ |
| 2570 // setcc a, C1 | 2528 // setcc a, C1 |
| 2571 InstFcmp::FCond Condition = Inst->getCondition(); | 2529 InstFcmp::FCond Condition = Fcmp->getCondition(); |
| 2572 size_t Index = static_cast<size_t>(Condition); | 2530 size_t Index = static_cast<size_t>(Condition); |
| 2573 assert(Index < Traits::TableFcmpSize); | 2531 assert(Index < Traits::TableFcmpSize); |
| 2574 if (Traits::TableFcmp[Index].SwapScalarOperands) | 2532 if (Traits::TableFcmp[Index].SwapScalarOperands) |
| 2575 std::swap(Src0, Src1); | 2533 std::swap(Src0, Src1); |
| 2576 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); | 2534 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); |
| 2577 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); | 2535 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); |
| 2578 if (HasC1) { | 2536 if (HasC1) { |
| 2579 Src0 = legalize(Src0); | 2537 Src0 = legalize(Src0); |
| 2580 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2538 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2581 Variable *T = nullptr; | 2539 Variable *T = nullptr; |
| 2582 _mov(T, Src0); | 2540 _mov(T, Src0); |
| 2583 _ucomiss(T, Src1RM); | 2541 _ucomiss(T, Src1RM); |
| 2584 if (!HasC2) { | 2542 if (!HasC2) { |
| 2585 assert(Traits::TableFcmp[Index].Default); | 2543 assert(Traits::TableFcmp[Index].Default); |
| 2586 setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br); | 2544 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer); |
| 2587 return; | 2545 return; |
| 2588 } | 2546 } |
| 2589 } | 2547 } |
| 2590 int32_t IntDefault = Traits::TableFcmp[Index].Default; | 2548 int32_t IntDefault = Traits::TableFcmp[Index].Default; |
| 2591 if (Br == nullptr) { | 2549 if (Consumer == nullptr) { |
| 2592 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); | 2550 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); |
| 2593 _mov(Dest, Default); | 2551 _mov(Dest, Default); |
| 2594 if (HasC1) { | 2552 if (HasC1) { |
| 2595 typename Traits::Insts::Label *Label = | 2553 typename Traits::Insts::Label *Label = |
| 2596 Traits::Insts::Label::create(Func, this); | 2554 Traits::Insts::Label::create(Func, this); |
| 2597 _br(Traits::TableFcmp[Index].C1, Label); | 2555 _br(Traits::TableFcmp[Index].C1, Label); |
| 2598 if (HasC2) { | 2556 if (HasC2) { |
| 2599 _br(Traits::TableFcmp[Index].C2, Label); | 2557 _br(Traits::TableFcmp[Index].C2, Label); |
| 2600 } | 2558 } |
| 2601 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); | 2559 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); |
| 2602 _mov_redefined(Dest, NonDefault); | 2560 _redefined(_mov(Dest, NonDefault)); |
| 2603 Context.insert(Label); | 2561 Context.insert(Label); |
| 2604 } | 2562 } |
| 2605 } else { | 2563 return; |
| 2564 } |
| 2565 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 2606 CfgNode *TrueSucc = Br->getTargetTrue(); | 2566 CfgNode *TrueSucc = Br->getTargetTrue(); |
| 2607 CfgNode *FalseSucc = Br->getTargetFalse(); | 2567 CfgNode *FalseSucc = Br->getTargetFalse(); |
| 2608 if (IntDefault != 0) | 2568 if (IntDefault != 0) |
| 2609 std::swap(TrueSucc, FalseSucc); | 2569 std::swap(TrueSucc, FalseSucc); |
| 2610 if (HasC1) { | 2570 if (HasC1) { |
| 2611 _br(Traits::TableFcmp[Index].C1, FalseSucc); | 2571 _br(Traits::TableFcmp[Index].C1, FalseSucc); |
| 2612 if (HasC2) { | 2572 if (HasC2) { |
| 2613 _br(Traits::TableFcmp[Index].C2, FalseSucc); | 2573 _br(Traits::TableFcmp[Index].C2, FalseSucc); |
| 2614 } | 2574 } |
| 2615 _br(TrueSucc); | 2575 _br(TrueSucc); |
| 2616 return; | 2576 return; |
| 2617 } | 2577 } |
| 2618 _br(FalseSucc); | 2578 _br(FalseSucc); |
| 2579 return; |
| 2619 } | 2580 } |
| 2581 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2582 Operand *SrcT = Select->getTrueOperand(); |
| 2583 Operand *SrcF = Select->getFalseOperand(); |
| 2584 Variable *SelectDest = Select->getDest(); |
| 2585 if (IntDefault != 0) |
| 2586 std::swap(SrcT, SrcF); |
| 2587 lowerMove(SelectDest, SrcF, false); |
| 2588 if (HasC1) { |
| 2589 typename Traits::Insts::Label *Label = |
| 2590 Traits::Insts::Label::create(Func, this); |
| 2591 _br(Traits::TableFcmp[Index].C1, Label); |
| 2592 if (HasC2) { |
| 2593 _br(Traits::TableFcmp[Index].C2, Label); |
| 2594 } |
| 2595 static constexpr bool IsRedefinition = true; |
| 2596 lowerMove(SelectDest, SrcT, IsRedefinition); |
| 2597 Context.insert(Label); |
| 2598 } |
| 2599 return; |
| 2600 } |
| 2601 llvm::report_fatal_error("Unexpected consumer type"); |
| 2602 } |
| 2603 |
| 2604 template <class Machine> |
| 2605 void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) { |
| 2606 Operand *Src0 = Fcmp->getSrc(0); |
| 2607 Operand *Src1 = Fcmp->getSrc(1); |
| 2608 Variable *Dest = Fcmp->getDest(); |
| 2609 |
| 2610 if (!isVectorType(Dest->getType())) |
| 2611 llvm::report_fatal_error("Expected vector compare"); |
| 2612 |
| 2613 InstFcmp::FCond Condition = Fcmp->getCondition(); |
| 2614 size_t Index = static_cast<size_t>(Condition); |
| 2615 assert(Index < Traits::TableFcmpSize); |
| 2616 |
| 2617 if (Traits::TableFcmp[Index].SwapVectorOperands) |
| 2618 std::swap(Src0, Src1); |
| 2619 |
| 2620 Variable *T = nullptr; |
| 2621 |
| 2622 if (Condition == InstFcmp::True) { |
| 2623 // makeVectorOfOnes() requires an integer vector type. |
| 2624 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 2625 } else if (Condition == InstFcmp::False) { |
| 2626 T = makeVectorOfZeros(Dest->getType()); |
| 2627 } else { |
| 2628 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2629 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2630 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| 2631 Src1RM = legalizeToReg(Src1RM); |
| 2632 |
| 2633 switch (Condition) { |
| 2634 default: { |
| 2635 typename Traits::Cond::CmppsCond Predicate = |
| 2636 Traits::TableFcmp[Index].Predicate; |
| 2637 assert(Predicate != Traits::Cond::Cmpps_Invalid); |
| 2638 T = makeReg(Src0RM->getType()); |
| 2639 _movp(T, Src0RM); |
| 2640 _cmpps(T, Src1RM, Predicate); |
| 2641 } break; |
| 2642 case InstFcmp::One: { |
| 2643 // Check both unequal and ordered. |
| 2644 T = makeReg(Src0RM->getType()); |
| 2645 Variable *T2 = makeReg(Src0RM->getType()); |
| 2646 _movp(T, Src0RM); |
| 2647 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq); |
| 2648 _movp(T2, Src0RM); |
| 2649 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord); |
| 2650 _pand(T, T2); |
| 2651 } break; |
| 2652 case InstFcmp::Ueq: { |
| 2653 // Check both equal or unordered. |
| 2654 T = makeReg(Src0RM->getType()); |
| 2655 Variable *T2 = makeReg(Src0RM->getType()); |
| 2656 _movp(T, Src0RM); |
| 2657 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq); |
| 2658 _movp(T2, Src0RM); |
| 2659 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord); |
| 2660 _por(T, T2); |
| 2661 } break; |
| 2662 } |
| 2663 } |
| 2664 |
| 2665 assert(T != nullptr); |
| 2666 _movp(Dest, T); |
| 2667 eliminateNextVectorSextInstruction(Dest); |
| 2620 } | 2668 } |
| 2621 | 2669 |
| 2622 inline bool isZero(const Operand *Opnd) { | 2670 inline bool isZero(const Operand *Opnd) { |
| 2623 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) | 2671 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) |
| 2624 return C64->getValue() == 0; | 2672 return C64->getValue() == 0; |
| 2625 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 2673 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 2626 return C32->getValue() == 0; | 2674 return C32->getValue() == 0; |
| 2627 return false; | 2675 return false; |
| 2628 } | 2676 } |
| 2629 | 2677 |
| 2630 template <class Machine> | 2678 template <class Machine> |
| 2631 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) { | 2679 void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp, |
| 2632 constexpr InstBr *Br = nullptr; | 2680 const Inst *Consumer) { |
| 2633 lowerIcmpAndBr(Inst, Br); | |
| 2634 } | |
| 2635 | |
| 2636 template <class Machine> | |
| 2637 void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp, | |
| 2638 const InstBr *Br) { | |
| 2639 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2681 Operand *Src0 = legalize(Icmp->getSrc(0)); |
| 2640 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2682 Operand *Src1 = legalize(Icmp->getSrc(1)); |
| 2641 Variable *Dest = Icmp->getDest(); | 2683 Variable *Dest = Icmp->getDest(); |
| 2642 | 2684 |
| 2643 if (isVectorType(Dest->getType())) { | 2685 if (isVectorType(Dest->getType())) |
| 2644 if (Br) | 2686 llvm::report_fatal_error("Vector compare/branch cannot be folded"); |
| 2645 llvm::report_fatal_error("vector compare/branch cannot be folded"); | |
| 2646 Type Ty = Src0->getType(); | |
| 2647 // Promote i1 vectors to 128 bit integer vector types. | |
| 2648 if (typeElementType(Ty) == IceType_i1) { | |
| 2649 Type NewTy = IceType_NUM; | |
| 2650 switch (Ty) { | |
| 2651 default: | |
| 2652 llvm_unreachable("unexpected type"); | |
| 2653 break; | |
| 2654 case IceType_v4i1: | |
| 2655 NewTy = IceType_v4i32; | |
| 2656 break; | |
| 2657 case IceType_v8i1: | |
| 2658 NewTy = IceType_v8i16; | |
| 2659 break; | |
| 2660 case IceType_v16i1: | |
| 2661 NewTy = IceType_v16i8; | |
| 2662 break; | |
| 2663 } | |
| 2664 Variable *NewSrc0 = Func->makeVariable(NewTy); | |
| 2665 Variable *NewSrc1 = Func->makeVariable(NewTy); | |
| 2666 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); | |
| 2667 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); | |
| 2668 Src0 = NewSrc0; | |
| 2669 Src1 = NewSrc1; | |
| 2670 Ty = NewTy; | |
| 2671 } | |
| 2672 | 2687 |
| 2673 InstIcmp::ICond Condition = Icmp->getCondition(); | 2688 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 2674 | 2689 lowerIcmp64(Icmp, Consumer); |
| 2675 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 2676 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 2677 | |
| 2678 // SSE2 only has signed comparison operations. Transform unsigned inputs in | |
| 2679 // a manner that allows for the use of signed comparison operations by | |
| 2680 // flipping the high order bits. | |
| 2681 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || | |
| 2682 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { | |
| 2683 Variable *T0 = makeReg(Ty); | |
| 2684 Variable *T1 = makeReg(Ty); | |
| 2685 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | |
| 2686 _movp(T0, Src0RM); | |
| 2687 _pxor(T0, HighOrderBits); | |
| 2688 _movp(T1, Src1RM); | |
| 2689 _pxor(T1, HighOrderBits); | |
| 2690 Src0RM = T0; | |
| 2691 Src1RM = T1; | |
| 2692 } | |
| 2693 | |
| 2694 Variable *T = makeReg(Ty); | |
| 2695 switch (Condition) { | |
| 2696 default: | |
| 2697 llvm_unreachable("unexpected condition"); | |
| 2698 break; | |
| 2699 case InstIcmp::Eq: { | |
| 2700 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | |
| 2701 Src1RM = legalizeToReg(Src1RM); | |
| 2702 _movp(T, Src0RM); | |
| 2703 _pcmpeq(T, Src1RM); | |
| 2704 } break; | |
| 2705 case InstIcmp::Ne: { | |
| 2706 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | |
| 2707 Src1RM = legalizeToReg(Src1RM); | |
| 2708 _movp(T, Src0RM); | |
| 2709 _pcmpeq(T, Src1RM); | |
| 2710 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
| 2711 _pxor(T, MinusOne); | |
| 2712 } break; | |
| 2713 case InstIcmp::Ugt: | |
| 2714 case InstIcmp::Sgt: { | |
| 2715 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | |
| 2716 Src1RM = legalizeToReg(Src1RM); | |
| 2717 _movp(T, Src0RM); | |
| 2718 _pcmpgt(T, Src1RM); | |
| 2719 } break; | |
| 2720 case InstIcmp::Uge: | |
| 2721 case InstIcmp::Sge: { | |
| 2722 // !(Src1RM > Src0RM) | |
| 2723 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | |
| 2724 Src0RM = legalizeToReg(Src0RM); | |
| 2725 _movp(T, Src1RM); | |
| 2726 _pcmpgt(T, Src0RM); | |
| 2727 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
| 2728 _pxor(T, MinusOne); | |
| 2729 } break; | |
| 2730 case InstIcmp::Ult: | |
| 2731 case InstIcmp::Slt: { | |
| 2732 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | |
| 2733 Src0RM = legalizeToReg(Src0RM); | |
| 2734 _movp(T, Src1RM); | |
| 2735 _pcmpgt(T, Src0RM); | |
| 2736 } break; | |
| 2737 case InstIcmp::Ule: | |
| 2738 case InstIcmp::Sle: { | |
| 2739 // !(Src0RM > Src1RM) | |
| 2740 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) | |
| 2741 Src1RM = legalizeToReg(Src1RM); | |
| 2742 _movp(T, Src0RM); | |
| 2743 _pcmpgt(T, Src1RM); | |
| 2744 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | |
| 2745 _pxor(T, MinusOne); | |
| 2746 } break; | |
| 2747 } | |
| 2748 | |
| 2749 _movp(Dest, T); | |
| 2750 eliminateNextVectorSextInstruction(Dest); | |
| 2751 return; | 2690 return; |
| 2752 } | 2691 } |
| 2753 | 2692 |
| 2754 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { | |
| 2755 lowerIcmp64(Icmp, Br); | |
| 2756 return; | |
| 2757 } | |
| 2758 | |
| 2759 // cmp b, c | 2693 // cmp b, c |
| 2760 if (isZero(Src1)) { | 2694 if (isZero(Src1)) { |
| 2761 switch (Icmp->getCondition()) { | 2695 switch (Icmp->getCondition()) { |
| 2762 default: | 2696 default: |
| 2763 break; | 2697 break; |
| 2764 case InstIcmp::Uge: | 2698 case InstIcmp::Uge: |
| 2765 movOrBr(true, Dest, Br); | 2699 movOrConsumer(true, Dest, Consumer); |
| 2766 return; | 2700 return; |
| 2767 case InstIcmp::Ult: | 2701 case InstIcmp::Ult: |
| 2768 movOrBr(false, Dest, Br); | 2702 movOrConsumer(false, Dest, Consumer); |
| 2769 return; | 2703 return; |
| 2770 } | 2704 } |
| 2771 } | 2705 } |
| 2772 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2706 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 2773 _cmp(Src0RM, Src1); | 2707 _cmp(Src0RM, Src1); |
| 2774 setccOrBr(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, Br); | 2708 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, |
| 2709 Consumer); |
| 2710 } |
| 2711 |
| 2712 template <class Machine> |
| 2713 void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) { |
| 2714 Operand *Src0 = legalize(Icmp->getSrc(0)); |
| 2715 Operand *Src1 = legalize(Icmp->getSrc(1)); |
| 2716 Variable *Dest = Icmp->getDest(); |
| 2717 |
| 2718 if (!isVectorType(Dest->getType())) |
| 2719 llvm::report_fatal_error("Expected a vector compare"); |
| 2720 |
| 2721 Type Ty = Src0->getType(); |
| 2722 // Promote i1 vectors to 128 bit integer vector types. |
| 2723 if (typeElementType(Ty) == IceType_i1) { |
| 2724 Type NewTy = IceType_NUM; |
| 2725 switch (Ty) { |
| 2726 default: |
| 2727 llvm::report_fatal_error("unexpected type"); |
| 2728 break; |
| 2729 case IceType_v4i1: |
| 2730 NewTy = IceType_v4i32; |
| 2731 break; |
| 2732 case IceType_v8i1: |
| 2733 NewTy = IceType_v8i16; |
| 2734 break; |
| 2735 case IceType_v16i1: |
| 2736 NewTy = IceType_v16i8; |
| 2737 break; |
| 2738 } |
| 2739 Variable *NewSrc0 = Func->makeVariable(NewTy); |
| 2740 Variable *NewSrc1 = Func->makeVariable(NewTy); |
| 2741 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0)); |
| 2742 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1)); |
| 2743 Src0 = NewSrc0; |
| 2744 Src1 = NewSrc1; |
| 2745 Ty = NewTy; |
| 2746 } |
| 2747 |
| 2748 InstIcmp::ICond Condition = Icmp->getCondition(); |
| 2749 |
| 2750 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2751 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2752 |
| 2753 // SSE2 only has signed comparison operations. Transform unsigned inputs in |
| 2754 // a manner that allows for the use of signed comparison operations by |
| 2755 // flipping the high order bits. |
| 2756 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || |
| 2757 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { |
| 2758 Variable *T0 = makeReg(Ty); |
| 2759 Variable *T1 = makeReg(Ty); |
| 2760 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| 2761 _movp(T0, Src0RM); |
| 2762 _pxor(T0, HighOrderBits); |
| 2763 _movp(T1, Src1RM); |
| 2764 _pxor(T1, HighOrderBits); |
| 2765 Src0RM = T0; |
| 2766 Src1RM = T1; |
| 2767 } |
| 2768 |
| 2769 Variable *T = makeReg(Ty); |
| 2770 switch (Condition) { |
| 2771 default: |
| 2772 llvm_unreachable("unexpected condition"); |
| 2773 break; |
| 2774 case InstIcmp::Eq: { |
| 2775 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| 2776 Src1RM = legalizeToReg(Src1RM); |
| 2777 _movp(T, Src0RM); |
| 2778 _pcmpeq(T, Src1RM); |
| 2779 } break; |
| 2780 case InstIcmp::Ne: { |
| 2781 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| 2782 Src1RM = legalizeToReg(Src1RM); |
| 2783 _movp(T, Src0RM); |
| 2784 _pcmpeq(T, Src1RM); |
| 2785 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2786 _pxor(T, MinusOne); |
| 2787 } break; |
| 2788 case InstIcmp::Ugt: |
| 2789 case InstIcmp::Sgt: { |
| 2790 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| 2791 Src1RM = legalizeToReg(Src1RM); |
| 2792 _movp(T, Src0RM); |
| 2793 _pcmpgt(T, Src1RM); |
| 2794 } break; |
| 2795 case InstIcmp::Uge: |
| 2796 case InstIcmp::Sge: { |
| 2797 // !(Src1RM > Src0RM) |
| 2798 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2799 Src0RM = legalizeToReg(Src0RM); |
| 2800 _movp(T, Src1RM); |
| 2801 _pcmpgt(T, Src0RM); |
| 2802 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2803 _pxor(T, MinusOne); |
| 2804 } break; |
| 2805 case InstIcmp::Ult: |
| 2806 case InstIcmp::Slt: { |
| 2807 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2808 Src0RM = legalizeToReg(Src0RM); |
| 2809 _movp(T, Src1RM); |
| 2810 _pcmpgt(T, Src0RM); |
| 2811 } break; |
| 2812 case InstIcmp::Ule: |
| 2813 case InstIcmp::Sle: { |
| 2814 // !(Src0RM > Src1RM) |
| 2815 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM)) |
| 2816 Src1RM = legalizeToReg(Src1RM); |
| 2817 _movp(T, Src0RM); |
| 2818 _pcmpgt(T, Src1RM); |
| 2819 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2820 _pxor(T, MinusOne); |
| 2821 } break; |
| 2822 } |
| 2823 |
| 2824 _movp(Dest, T); |
| 2825 eliminateNextVectorSextInstruction(Dest); |
| 2775 } | 2826 } |
| 2776 | 2827 |
| 2777 template <typename Machine> | 2828 template <typename Machine> |
| 2778 template <typename T> | 2829 template <typename T> |
| 2779 typename std::enable_if<!T::Is64Bit, void>::type | 2830 typename std::enable_if<!T::Is64Bit, void>::type |
| 2780 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { | 2831 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, |
| 2832 const Inst *Consumer) { |
| 2781 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2833 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| 2782 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2834 Operand *Src0 = legalize(Icmp->getSrc(0)); |
| 2783 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2835 Operand *Src1 = legalize(Icmp->getSrc(1)); |
| 2784 Variable *Dest = Icmp->getDest(); | 2836 Variable *Dest = Icmp->getDest(); |
| 2785 InstIcmp::ICond Condition = Icmp->getCondition(); | 2837 InstIcmp::ICond Condition = Icmp->getCondition(); |
| 2786 size_t Index = static_cast<size_t>(Condition); | 2838 size_t Index = static_cast<size_t>(Condition); |
| 2787 assert(Index < Traits::TableIcmp64Size); | 2839 assert(Index < Traits::TableIcmp64Size); |
| 2788 Operand *Src0LoRM = nullptr; | 2840 Operand *Src0LoRM = nullptr; |
| 2789 Operand *Src0HiRM = nullptr; | 2841 Operand *Src0HiRM = nullptr; |
| 2790 // Legalize the portions of Src0 that are going to be needed. | 2842 // Legalize the portions of Src0 that are going to be needed. |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2828 default: | 2880 default: |
| 2829 llvm_unreachable("unexpected condition"); | 2881 llvm_unreachable("unexpected condition"); |
| 2830 break; | 2882 break; |
| 2831 case InstIcmp::Eq: | 2883 case InstIcmp::Eq: |
| 2832 case InstIcmp::Ule: | 2884 case InstIcmp::Ule: |
| 2833 // Mov Src0HiRM first, because it was legalized most recently, and will | 2885 // Mov Src0HiRM first, because it was legalized most recently, and will |
| 2834 // sometimes avoid a move before the OR. | 2886 // sometimes avoid a move before the OR. |
| 2835 _mov(Temp, Src0HiRM); | 2887 _mov(Temp, Src0HiRM); |
| 2836 _or(Temp, Src0LoRM); | 2888 _or(Temp, Src0LoRM); |
| 2837 Context.insert(InstFakeUse::create(Func, Temp)); | 2889 Context.insert(InstFakeUse::create(Func, Temp)); |
| 2838 setccOrBr(Traits::Cond::Br_e, Dest, Br); | 2890 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); |
| 2839 return; | 2891 return; |
| 2840 case InstIcmp::Ne: | 2892 case InstIcmp::Ne: |
| 2841 case InstIcmp::Ugt: | 2893 case InstIcmp::Ugt: |
| 2842 // Mov Src0HiRM first, because it was legalized most recently, and will | 2894 // Mov Src0HiRM first, because it was legalized most recently, and will |
| 2843 // sometimes avoid a move before the OR. | 2895 // sometimes avoid a move before the OR. |
| 2844 _mov(Temp, Src0HiRM); | 2896 _mov(Temp, Src0HiRM); |
| 2845 _or(Temp, Src0LoRM); | 2897 _or(Temp, Src0LoRM); |
| 2846 Context.insert(InstFakeUse::create(Func, Temp)); | 2898 Context.insert(InstFakeUse::create(Func, Temp)); |
| 2847 setccOrBr(Traits::Cond::Br_ne, Dest, Br); | 2899 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); |
| 2848 return; | 2900 return; |
| 2849 case InstIcmp::Uge: | 2901 case InstIcmp::Uge: |
| 2850 movOrBr(true, Dest, Br); | 2902 movOrConsumer(true, Dest, Consumer); |
| 2851 return; | 2903 return; |
| 2852 case InstIcmp::Ult: | 2904 case InstIcmp::Ult: |
| 2853 movOrBr(false, Dest, Br); | 2905 movOrConsumer(false, Dest, Consumer); |
| 2854 return; | 2906 return; |
| 2855 case InstIcmp::Sgt: | 2907 case InstIcmp::Sgt: |
| 2856 break; | 2908 break; |
| 2857 case InstIcmp::Sge: | 2909 case InstIcmp::Sge: |
| 2858 _test(Src0HiRM, SignMask); | 2910 _test(Src0HiRM, SignMask); |
| 2859 setccOrBr(Traits::Cond::Br_e, Dest, Br); | 2911 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer); |
| 2860 return; | 2912 return; |
| 2861 case InstIcmp::Slt: | 2913 case InstIcmp::Slt: |
| 2862 _test(Src0HiRM, SignMask); | 2914 _test(Src0HiRM, SignMask); |
| 2863 setccOrBr(Traits::Cond::Br_ne, Dest, Br); | 2915 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer); |
| 2864 return; | 2916 return; |
| 2865 case InstIcmp::Sle: | 2917 case InstIcmp::Sle: |
| 2866 break; | 2918 break; |
| 2867 } | 2919 } |
| 2868 } | 2920 } |
| 2869 // Handle general compares. | 2921 // Handle general compares. |
| 2870 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 2922 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| 2871 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 2923 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| 2872 if (Br == nullptr) { | 2924 if (Consumer == nullptr) { |
| 2873 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); | 2925 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); |
| 2874 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); | 2926 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); |
| 2875 typename Traits::Insts::Label *LabelFalse = | 2927 typename Traits::Insts::Label *LabelFalse = |
| 2876 Traits::Insts::Label::create(Func, this); | 2928 Traits::Insts::Label::create(Func, this); |
| 2877 typename Traits::Insts::Label *LabelTrue = | 2929 typename Traits::Insts::Label *LabelTrue = |
| 2878 Traits::Insts::Label::create(Func, this); | 2930 Traits::Insts::Label::create(Func, this); |
| 2879 _mov(Dest, One); | 2931 _mov(Dest, One); |
| 2880 _cmp(Src0HiRM, Src1HiRI); | 2932 _cmp(Src0HiRM, Src1HiRI); |
| 2881 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2933 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2882 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | 2934 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 2883 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2935 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2884 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | 2936 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 2885 _cmp(Src0LoRM, Src1LoRI); | 2937 _cmp(Src0LoRM, Src1LoRI); |
| 2886 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | 2938 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 2887 Context.insert(LabelFalse); | 2939 Context.insert(LabelFalse); |
| 2888 _mov_redefined(Dest, Zero); | 2940 _redefined(_mov(Dest, Zero)); |
| 2889 Context.insert(LabelTrue); | 2941 Context.insert(LabelTrue); |
| 2890 } else { | 2942 return; |
| 2943 } |
| 2944 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 2891 _cmp(Src0HiRM, Src1HiRI); | 2945 _cmp(Src0HiRM, Src1HiRI); |
| 2892 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | 2946 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2893 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); | 2947 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); |
| 2894 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | 2948 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2895 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); | 2949 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); |
| 2896 _cmp(Src0LoRM, Src1LoRI); | 2950 _cmp(Src0LoRM, Src1LoRI); |
| 2897 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), | 2951 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), |
| 2898 Br->getTargetFalse()); | 2952 Br->getTargetFalse()); |
| 2953 return; |
| 2899 } | 2954 } |
| 2955 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2956 Operand *SrcT = Select->getTrueOperand(); |
| 2957 Operand *SrcF = Select->getFalseOperand(); |
| 2958 Variable *SelectDest = Select->getDest(); |
| 2959 typename Traits::Insts::Label *LabelFalse = |
| 2960 Traits::Insts::Label::create(Func, this); |
| 2961 typename Traits::Insts::Label *LabelTrue = |
| 2962 Traits::Insts::Label::create(Func, this); |
| 2963 lowerMove(SelectDest, SrcT, false); |
| 2964 _cmp(Src0HiRM, Src1HiRI); |
| 2965 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2966 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 2967 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2968 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 2969 _cmp(Src0LoRM, Src1LoRI); |
| 2970 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 2971 Context.insert(LabelFalse); |
| 2972 static constexpr bool IsRedefinition = true; |
| 2973 lowerMove(SelectDest, SrcF, IsRedefinition); |
| 2974 Context.insert(LabelTrue); |
| 2975 return; |
| 2976 } |
| 2977 llvm::report_fatal_error("Unexpected consumer type"); |
| 2900 } | 2978 } |
| 2901 | 2979 |
| 2902 template <class Machine> | 2980 template <class Machine> |
| 2903 void TargetX86Base<Machine>::setccOrBr(typename Traits::Cond::BrCond Condition, | 2981 void TargetX86Base<Machine>::setccOrConsumer( |
| 2904 Variable *Dest, const InstBr *Br) { | 2982 typename Traits::Cond::BrCond Condition, Variable *Dest, |
| 2905 if (Br == nullptr) { | 2983 const Inst *Consumer) { |
| 2984 if (Consumer == nullptr) { |
| 2906 _setcc(Dest, Condition); | 2985 _setcc(Dest, Condition); |
| 2907 } else { | 2986 return; |
| 2987 } |
| 2988 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 2908 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); | 2989 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); |
| 2990 return; |
| 2909 } | 2991 } |
| 2992 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 2993 Operand *SrcT = Select->getTrueOperand(); |
| 2994 Operand *SrcF = Select->getFalseOperand(); |
| 2995 Variable *SelectDest = Select->getDest(); |
| 2996 lowerSelectMove(SelectDest, Condition, SrcT, SrcF); |
| 2997 return; |
| 2998 } |
| 2999 llvm::report_fatal_error("Unexpected consumer type"); |
| 2910 } | 3000 } |
| 2911 | 3001 |
| 2912 template <class Machine> | 3002 template <class Machine> |
| 2913 void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest, | 3003 void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest, |
| 2914 const InstBr *Br) { | 3004 const Inst *Consumer) { |
| 2915 if (Br == nullptr) { | 3005 if (Consumer == nullptr) { |
| 2916 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3006 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
| 2917 } else { | 3007 return; |
| 3008 } |
| 3009 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 2918 // TODO(sehr,stichnot): This could be done with a single unconditional | 3010 // TODO(sehr,stichnot): This could be done with a single unconditional |
| 2919 // branch instruction, but subzero doesn't know how to handle the resulting | 3011 // branch instruction, but subzero doesn't know how to handle the resulting |
| 2920 // control flow graph changes now. Make it do so to eliminate mov and cmp. | 3012 // control flow graph changes now. Make it do so to eliminate mov and cmp. |
| 2921 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3013 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
| 2922 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); | 3014 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); |
| 2923 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3015 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 3016 return; |
| 2924 } | 3017 } |
| 3018 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) { |
| 3019 Operand *Src = nullptr; |
| 3020 if (IcmpResult) { |
| 3021 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm); |
| 3022 } else { |
| 3023 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm); |
| 3024 } |
| 3025 Variable *SelectDest = Select->getDest(); |
| 3026 lowerMove(SelectDest, Src, false); |
| 3027 return; |
| 3028 } |
| 3029 llvm::report_fatal_error("Unexpected consumer type"); |
| 2925 } | 3030 } |
| 2926 | 3031 |
| 2927 template <class Machine> | 3032 template <class Machine> |
| 2928 void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith, | 3033 void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith, |
| 2929 const InstBr *Br) { | 3034 const Inst *Consumer) { |
| 2930 Variable *T = nullptr; | 3035 Variable *T = nullptr; |
| 2931 Operand *Src0 = legalize(Arith->getSrc(0)); | 3036 Operand *Src0 = legalize(Arith->getSrc(0)); |
| 2932 Operand *Src1 = legalize(Arith->getSrc(1)); | 3037 Operand *Src1 = legalize(Arith->getSrc(1)); |
| 2933 Variable *Dest = Arith->getDest(); | 3038 Variable *Dest = Arith->getDest(); |
| 2934 switch (Arith->getOp()) { | 3039 switch (Arith->getOp()) { |
| 2935 default: | 3040 default: |
| 2936 llvm_unreachable("arithmetic operator not AND or OR"); | 3041 llvm_unreachable("arithmetic operator not AND or OR"); |
| 2937 break; | 3042 break; |
| 2938 case InstArithmetic::And: | 3043 case InstArithmetic::And: |
| 2939 _mov(T, Src0); | 3044 _mov(T, Src0); |
| 2940 // Test cannot have an address in the second position. Since T is | 3045 // Test cannot have an address in the second position. Since T is |
| 2941 // guaranteed to be a register and Src1 could be a memory load, ensure | 3046 // guaranteed to be a register and Src1 could be a memory load, ensure |
| 2942 // that the second argument is a register. | 3047 // that the second argument is a register. |
| 2943 if (llvm::isa<Constant>(Src1)) | 3048 if (llvm::isa<Constant>(Src1)) |
| 2944 _test(T, Src1); | 3049 _test(T, Src1); |
| 2945 else | 3050 else |
| 2946 _test(Src1, T); | 3051 _test(Src1, T); |
| 2947 break; | 3052 break; |
| 2948 case InstArithmetic::Or: | 3053 case InstArithmetic::Or: |
| 2949 _mov(T, Src0); | 3054 _mov(T, Src0); |
| 2950 _or(T, Src1); | 3055 _or(T, Src1); |
| 2951 break; | 3056 break; |
| 2952 } | 3057 } |
| 2953 Context.insert(InstFakeUse::create(Func, T)); | 3058 |
| 2954 Context.insert(InstFakeDef::create(Func, Dest)); | 3059 if (Consumer == nullptr) { |
| 2955 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3060 llvm::report_fatal_error("Expected a consumer instruction"); |
| 3061 } |
| 3062 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) { |
| 3063 Context.insert(InstFakeUse::create(Func, T)); |
| 3064 Context.insert(InstFakeDef::create(Func, Dest)); |
| 3065 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
| 3066 return; |
| 3067 } |
| 3068 llvm::report_fatal_error("Unexpected consumer type"); |
| 2956 } | 3069 } |
| 2957 | 3070 |
| 2958 template <class Machine> | 3071 template <class Machine> |
| 2959 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 3072 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
| 2960 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3073 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2961 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3074 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| 2962 ConstantInteger32 *ElementIndex = | 3075 ConstantInteger32 *ElementIndex = |
| 2963 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 3076 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
| 2964 // Only constant indices are allowed in PNaCl IR. | 3077 // Only constant indices are allowed in PNaCl IR. |
| 2965 assert(ElementIndex); | 3078 assert(ElementIndex); |
| (...skipping 462 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3428 case Intrinsics::Stacksave: { | 3541 case Intrinsics::Stacksave: { |
| 3429 Variable *esp = | 3542 Variable *esp = |
| 3430 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 3543 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 3431 Variable *Dest = Instr->getDest(); | 3544 Variable *Dest = Instr->getDest(); |
| 3432 _mov(Dest, esp); | 3545 _mov(Dest, esp); |
| 3433 return; | 3546 return; |
| 3434 } | 3547 } |
| 3435 case Intrinsics::Stackrestore: { | 3548 case Intrinsics::Stackrestore: { |
| 3436 Variable *esp = | 3549 Variable *esp = |
| 3437 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 3550 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 3438 _mov_redefined(esp, Instr->getArg(0)); | 3551 _redefined(_mov(esp, Instr->getArg(0))); |
| 3439 return; | 3552 return; |
| 3440 } | 3553 } |
| 3441 case Intrinsics::Trap: | 3554 case Intrinsics::Trap: |
| 3442 _ud2(); | 3555 _ud2(); |
| 3443 return; | 3556 return; |
| 3444 case Intrinsics::UnknownIntrinsic: | 3557 case Intrinsics::UnknownIntrinsic: |
| 3445 Func->setError("Should not be lowering UnknownIntrinsic"); | 3558 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3446 return; | 3559 return; |
| 3447 } | 3560 } |
| 3448 return; | 3561 return; |
| (...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4610 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 4723 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
| 4611 } | 4724 } |
| 4612 } | 4725 } |
| 4613 | 4726 |
| 4614 template <class Machine> | 4727 template <class Machine> |
| 4615 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | 4728 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { |
| 4616 Func->setError("Phi found in regular instruction list"); | 4729 Func->setError("Phi found in regular instruction list"); |
| 4617 } | 4730 } |
| 4618 | 4731 |
| 4619 template <class Machine> | 4732 template <class Machine> |
| 4620 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { | 4733 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) { |
| 4621 Variable *Dest = Inst->getDest(); | 4734 Variable *Dest = Select->getDest(); |
| 4622 Type DestTy = Dest->getType(); | |
| 4623 Operand *SrcT = Inst->getTrueOperand(); | |
| 4624 Operand *SrcF = Inst->getFalseOperand(); | |
| 4625 Operand *Condition = Inst->getCondition(); | |
| 4626 | 4735 |
| 4627 if (isVectorType(DestTy)) { | 4736 if (isVectorType(Dest->getType())) { |
| 4628 Type SrcTy = SrcT->getType(); | 4737 lowerSelectVector(Select); |
| 4629 Variable *T = makeReg(SrcTy); | |
| 4630 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | |
| 4631 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | |
| 4632 if (InstructionSet >= Traits::SSE4_1) { | |
| 4633 // TODO(wala): If the condition operand is a constant, use blendps or | |
| 4634 // pblendw. | |
| 4635 // | |
| 4636 // Use blendvps or pblendvb to implement select. | |
| 4637 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | |
| 4638 SrcTy == IceType_v4f32) { | |
| 4639 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | |
| 4640 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0); | |
| 4641 _movp(xmm0, ConditionRM); | |
| 4642 _psll(xmm0, Ctx->getConstantInt8(31)); | |
| 4643 _movp(T, SrcFRM); | |
| 4644 _blendvps(T, SrcTRM, xmm0); | |
| 4645 _movp(Dest, T); | |
| 4646 } else { | |
| 4647 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | |
| 4648 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | |
| 4649 : IceType_v16i8; | |
| 4650 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0); | |
| 4651 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | |
| 4652 _movp(T, SrcFRM); | |
| 4653 _pblendvb(T, SrcTRM, xmm0); | |
| 4654 _movp(Dest, T); | |
| 4655 } | |
| 4656 return; | |
| 4657 } | |
| 4658 // Lower select without Traits::SSE4.1: | |
| 4659 // a=d?b:c ==> | |
| 4660 // if elementtype(d) != i1: | |
| 4661 // d=sext(d); | |
| 4662 // a=(b&d)|(c&~d); | |
| 4663 Variable *T2 = makeReg(SrcTy); | |
| 4664 // Sign extend the condition operand if applicable. | |
| 4665 if (SrcTy == IceType_v4f32) { | |
| 4666 // The sext operation takes only integer arguments. | |
| 4667 Variable *T3 = Func->makeVariable(IceType_v4i32); | |
| 4668 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); | |
| 4669 _movp(T, T3); | |
| 4670 } else if (typeElementType(SrcTy) != IceType_i1) { | |
| 4671 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); | |
| 4672 } else { | |
| 4673 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | |
| 4674 _movp(T, ConditionRM); | |
| 4675 } | |
| 4676 _movp(T2, T); | |
| 4677 _pand(T, SrcTRM); | |
| 4678 _pandn(T2, SrcFRM); | |
| 4679 _por(T, T2); | |
| 4680 _movp(Dest, T); | |
| 4681 | |
| 4682 return; | 4738 return; |
| 4683 } | 4739 } |
| 4684 | 4740 |
| 4685 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; | 4741 Operand *Condition = Select->getCondition(); |
| 4686 Operand *CmpOpnd0 = nullptr; | |
| 4687 Operand *CmpOpnd1 = nullptr; | |
| 4688 // Handle folding opportunities. | 4742 // Handle folding opportunities. |
| 4689 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4743 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| 4690 assert(Producer->isDeleted()); | 4744 assert(Producer->isDeleted()); |
| 4691 switch (BoolFolding::getProducerKind(Producer)) { | 4745 switch (BoolFolding::getProducerKind(Producer)) { |
| 4692 default: | 4746 default: |
| 4693 break; | 4747 break; |
| 4694 case BoolFolding::PK_Icmp32: { | 4748 case BoolFolding::PK_Icmp32: |
| 4695 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 4749 case BoolFolding::PK_Icmp64: { |
| 4696 Cond = Traits::getIcmp32Mapping(Cmp->getCondition()); | 4750 lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Select); |
| 4697 CmpOpnd1 = legalize(Producer->getSrc(1)); | 4751 return; |
| 4698 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); | 4752 } |
| 4699 } break; | 4753 case BoolFolding::PK_Fcmp: { |
| 4754 lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Select); |
| 4755 return; |
| 4756 } |
| 4700 } | 4757 } |
| 4701 } | 4758 } |
| 4702 if (CmpOpnd0 == nullptr) { | |
| 4703 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); | |
| 4704 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); | |
| 4705 } | |
| 4706 assert(CmpOpnd0); | |
| 4707 assert(CmpOpnd1); | |
| 4708 | 4759 |
| 4709 _cmp(CmpOpnd0, CmpOpnd1); | 4760 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4761 Operand *Zero = Ctx->getConstantZero(IceType_i32); |
| 4762 _cmp(CmpResult, Zero); |
| 4763 Operand *SrcT = Select->getTrueOperand(); |
| 4764 Operand *SrcF = Select->getFalseOperand(); |
| 4765 const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; |
| 4766 lowerSelectMove(Dest, Cond, SrcT, SrcF); |
| 4767 } |
| 4768 |
| 4769 template <class Machine> |
| 4770 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest, |
| 4771 typename Traits::Cond::BrCond Cond, |
| 4772 Operand *SrcT, Operand *SrcF) { |
| 4773 Type DestTy = Dest->getType(); |
| 4710 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { | 4774 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { |
| 4711 // The cmov instruction doesn't allow 8-bit or FP operands, so we need | 4775 // The cmov instruction doesn't allow 8-bit or FP operands, so we need |
| 4712 // explicit control flow. | 4776 // explicit control flow. |
| 4713 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: | 4777 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: |
| 4714 typename Traits::Insts::Label *Label = | 4778 typename Traits::Insts::Label *Label = |
| 4715 Traits::Insts::Label::create(Func, this); | 4779 Traits::Insts::Label::create(Func, this); |
| 4716 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); | 4780 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); |
| 4717 _mov(Dest, SrcT); | 4781 _mov(Dest, SrcT); |
| 4718 _br(Cond, Label); | 4782 _br(Cond, Label); |
| 4719 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); | 4783 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); |
| 4720 _mov_redefined(Dest, SrcF); | 4784 _redefined(_mov(Dest, SrcF)); |
| 4721 Context.insert(Label); | 4785 Context.insert(Label); |
| 4722 return; | 4786 return; |
| 4723 } | 4787 } |
| 4724 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4788 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
| 4725 // But if SrcT is immediate, we might be able to do better, as the cmov | 4789 // But if SrcT is immediate, we might be able to do better, as the cmov |
| 4726 // instruction doesn't allow an immediate operand: | 4790 // instruction doesn't allow an immediate operand: |
| 4727 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t | 4791 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t |
| 4728 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { | 4792 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { |
| 4729 std::swap(SrcT, SrcF); | 4793 std::swap(SrcT, SrcF); |
| 4730 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); | 4794 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); |
| 4731 } | 4795 } |
| 4732 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 4796 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 4733 SrcT = legalizeUndef(SrcT); | 4797 SrcT = legalizeUndef(SrcT); |
| 4734 SrcF = legalizeUndef(SrcF); | 4798 SrcF = legalizeUndef(SrcF); |
| 4735 // Set the low portion. | 4799 // Set the low portion. |
| 4736 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4800 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 4737 Variable *TLo = nullptr; | 4801 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF)); |
| 4738 Operand *SrcFLo = legalize(loOperand(SrcF)); | |
| 4739 _mov(TLo, SrcFLo); | |
| 4740 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); | |
| 4741 _cmov(TLo, SrcTLo, Cond); | |
| 4742 _mov(DestLo, TLo); | |
| 4743 // Set the high portion. | 4802 // Set the high portion. |
| 4744 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4803 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4745 Variable *THi = nullptr; | 4804 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF)); |
| 4746 Operand *SrcFHi = legalize(hiOperand(SrcF)); | |
| 4747 _mov(THi, SrcFHi); | |
| 4748 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); | |
| 4749 _cmov(THi, SrcTHi, Cond); | |
| 4750 _mov(DestHi, THi); | |
| 4751 return; | 4805 return; |
| 4752 } | 4806 } |
| 4753 | 4807 |
| 4754 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || | 4808 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || |
| 4755 (Traits::Is64Bit && DestTy == IceType_i64)); | 4809 (Traits::Is64Bit && DestTy == IceType_i64)); |
| 4810 lowerSelectIntMove(Dest, Cond, SrcT, SrcF); |
| 4811 } |
| 4812 |
| 4813 template <class Machine> |
| 4814 void TargetX86Base<Machine>::lowerSelectIntMove( |
| 4815 Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT, |
| 4816 Operand *SrcF) { |
| 4756 Variable *T = nullptr; | 4817 Variable *T = nullptr; |
| 4757 SrcF = legalize(SrcF); | 4818 SrcF = legalize(SrcF); |
| 4758 _mov(T, SrcF); | 4819 _mov(T, SrcF); |
| 4759 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4820 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4760 _cmov(T, SrcT, Cond); | 4821 _cmov(T, SrcT, Cond); |
| 4761 _mov(Dest, T); | 4822 _mov(Dest, T); |
| 4762 } | 4823 } |
| 4763 | 4824 |
| 4764 template <class Machine> | 4825 template <class Machine> |
| 4826 void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src, |
| 4827 bool IsRedefinition) { |
| 4828 assert(Dest->getType() == Src->getType()); |
| 4829 assert(!Dest->isRematerializable()); |
| 4830 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 4831 Src = legalize(Src); |
| 4832 Operand *SrcLo = loOperand(Src); |
| 4833 Operand *SrcHi = hiOperand(Src); |
| 4834 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 4835 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4836 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 4837 _mov(T_Lo, SrcLo); |
| 4838 _redefined(_mov(DestLo, T_Lo), IsRedefinition); |
| 4839 _mov(T_Hi, SrcHi); |
| 4840 _redefined(_mov(DestHi, T_Hi), IsRedefinition); |
| 4841 } else { |
| 4842 Operand *SrcLegal; |
| 4843 if (Dest->hasReg()) { |
| 4844 // If Dest already has a physical register, then only basic legalization |
| 4845 // is needed, as the source operand can be a register, immediate, or |
| 4846 // memory. |
| 4847 SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum()); |
| 4848 } else { |
| 4849 // If Dest could be a stack operand, then RI must be a physical register |
| 4850 // or a scalar integer immediate. |
| 4851 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm); |
| 4852 } |
| 4853 if (isVectorType(Dest->getType())) { |
| 4854 _redefined(_movp(Dest, SrcLegal), IsRedefinition); |
| 4855 } else { |
| 4856 _redefined(_mov(Dest, SrcLegal), IsRedefinition); |
| 4857 } |
| 4858 } |
| 4859 } |
| 4860 |
| 4861 template <class Machine> |
| 4862 bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp, |
| 4863 const InstSelect *Select) { |
| 4864 Operand *CmpSrc0 = Fcmp->getSrc(0); |
| 4865 Operand *CmpSrc1 = Fcmp->getSrc(1); |
| 4866 Operand *SelectSrcT = Select->getTrueOperand(); |
| 4867 Operand *SelectSrcF = Select->getFalseOperand(); |
| 4868 |
| 4869 if (CmpSrc0->getType() != SelectSrcT->getType()) |
| 4870 return false; |
| 4871 |
| 4872 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. |
| 4873 InstFcmp::FCond Condition = Fcmp->getCondition(); |
| 4874 switch (Condition) { |
| 4875 default: |
| 4876 return false; |
| 4877 case InstFcmp::True: |
| 4878 case InstFcmp::False: |
| 4879 case InstFcmp::Ogt: |
| 4880 case InstFcmp::Olt: |
| 4881 (void)CmpSrc0; |
| 4882 (void)CmpSrc1; |
| 4883 (void)SelectSrcT; |
| 4884 (void)SelectSrcF; |
| 4885 break; |
| 4886 } |
| 4887 return false; |
| 4888 } |
| 4889 |
| 4890 template <class Machine> |
| 4891 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) { |
| 4892 Variable *Dest = Icmp->getDest(); |
| 4893 if (isVectorType(Dest->getType())) { |
| 4894 lowerIcmpVector(Icmp); |
| 4895 } else { |
| 4896 constexpr Inst *Consumer = nullptr; |
| 4897 lowerIcmpAndConsumer(Icmp, Consumer); |
| 4898 } |
| 4899 } |
| 4900 |
| 4901 template <class Machine> |
| 4902 void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) { |
| 4903 Variable *Dest = Inst->getDest(); |
| 4904 Type DestTy = Dest->getType(); |
| 4905 Operand *SrcT = Inst->getTrueOperand(); |
| 4906 Operand *SrcF = Inst->getFalseOperand(); |
| 4907 Operand *Condition = Inst->getCondition(); |
| 4908 |
| 4909 if (!isVectorType(DestTy)) |
| 4910 llvm::report_fatal_error("Expected a vector select"); |
| 4911 |
| 4912 Type SrcTy = SrcT->getType(); |
| 4913 Variable *T = makeReg(SrcTy); |
| 4914 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4915 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| 4916 if (InstructionSet >= Traits::SSE4_1) { |
| 4917 // TODO(wala): If the condition operand is a constant, use blendps or |
| 4918 // pblendw. |
| 4919 // |
| 4920 // Use blendvps or pblendvb to implement select. |
| 4921 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| 4922 SrcTy == IceType_v4f32) { |
| 4923 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4924 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0); |
| 4925 _movp(xmm0, ConditionRM); |
| 4926 _psll(xmm0, Ctx->getConstantInt8(31)); |
| 4927 _movp(T, SrcFRM); |
| 4928 _blendvps(T, SrcTRM, xmm0); |
| 4929 _movp(Dest, T); |
| 4930 } else { |
| 4931 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| 4932 Type SignExtTy = |
| 4933 Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8; |
| 4934 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0); |
| 4935 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| 4936 _movp(T, SrcFRM); |
| 4937 _pblendvb(T, SrcTRM, xmm0); |
| 4938 _movp(Dest, T); |
| 4939 } |
| 4940 return; |
| 4941 } |
| 4942 // Lower select without Traits::SSE4.1: |
| 4943 // a=d?b:c ==> |
| 4944 // if elementtype(d) != i1: |
| 4945 // d=sext(d); |
| 4946 // a=(b&d)|(c&~d); |
| 4947 Variable *T2 = makeReg(SrcTy); |
| 4948 // Sign extend the condition operand if applicable. |
| 4949 if (SrcTy == IceType_v4f32) { |
| 4950 // The sext operation takes only integer arguments. |
| 4951 Variable *T3 = Func->makeVariable(IceType_v4i32); |
| 4952 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| 4953 _movp(T, T3); |
| 4954 } else if (typeElementType(SrcTy) != IceType_i1) { |
| 4955 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); |
| 4956 } else { |
| 4957 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4958 _movp(T, ConditionRM); |
| 4959 } |
| 4960 _movp(T2, T); |
| 4961 _pand(T, SrcTRM); |
| 4962 _pandn(T2, SrcFRM); |
| 4963 _por(T, T2); |
| 4964 _movp(Dest, T); |
| 4965 |
| 4966 return; |
| 4967 } |
| 4968 |
| 4969 template <class Machine> |
| 4765 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | 4970 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { |
| 4766 Operand *Value = Inst->getData(); | 4971 Operand *Value = Inst->getData(); |
| 4767 Operand *Addr = Inst->getAddr(); | 4972 Operand *Addr = Inst->getAddr(); |
| 4768 typename Traits::X86OperandMem *NewAddr = | 4973 typename Traits::X86OperandMem *NewAddr = |
| 4769 formMemoryOperand(Addr, Value->getType()); | 4974 formMemoryOperand(Addr, Value->getType()); |
| 4770 doMockBoundsCheck(NewAddr); | 4975 doMockBoundsCheck(NewAddr); |
| 4771 Type Ty = NewAddr->getType(); | 4976 Type Ty = NewAddr->getType(); |
| 4772 | 4977 |
| 4773 if (!Traits::Is64Bit && Ty == IceType_i64) { | 4978 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 4774 Value = legalizeUndef(Value); | 4979 Value = legalizeUndef(Value); |
| (...skipping 738 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5513 case IceType_i8: | 5718 case IceType_i8: |
| 5514 case IceType_i16: | 5719 case IceType_i16: |
| 5515 case IceType_i32: | 5720 case IceType_i32: |
| 5516 case IceType_i64: | 5721 case IceType_i64: |
| 5517 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. | 5722 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. |
| 5518 _mov(Reg, Ctx->getConstantZero(Ty)); | 5723 _mov(Reg, Ctx->getConstantZero(Ty)); |
| 5519 break; | 5724 break; |
| 5520 case IceType_f32: | 5725 case IceType_f32: |
| 5521 case IceType_f64: | 5726 case IceType_f64: |
| 5522 Context.insert(InstFakeDef::create(Func, Reg)); | 5727 Context.insert(InstFakeDef::create(Func, Reg)); |
| 5523 // TODO(stichnot): Use xorps/xorpd instead of pxor. | 5728 _xorps(Reg, Reg); |
| 5524 _pxor(Reg, Reg); | |
| 5525 break; | 5729 break; |
| 5526 default: | 5730 default: |
| 5527 // All vector types use the same pxor instruction. | 5731 // All vector types use the same pxor instruction. |
| 5528 assert(isVectorType(Ty)); | 5732 assert(isVectorType(Ty)); |
| 5529 Context.insert(InstFakeDef::create(Func, Reg)); | 5733 Context.insert(InstFakeDef::create(Func, Reg)); |
| 5530 _pxor(Reg, Reg); | 5734 _pxor(Reg, Reg); |
| 5531 break; | 5735 break; |
| 5532 } | 5736 } |
| 5533 return Reg; | 5737 return Reg; |
| 5534 } | 5738 } |
| (...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6206 } | 6410 } |
| 6207 // the offset is not eligible for blinding or pooling, return the original | 6411 // the offset is not eligible for blinding or pooling, return the original |
| 6208 // mem operand | 6412 // mem operand |
| 6209 return MemOperand; | 6413 return MemOperand; |
| 6210 } | 6414 } |
| 6211 | 6415 |
| 6212 } // end of namespace X86Internal | 6416 } // end of namespace X86Internal |
| 6213 } // end of namespace Ice | 6417 } // end of namespace Ice |
| 6214 | 6418 |
| 6215 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6419 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |