Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1497033002: Fuse icmp/fcmp with select (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 92
93 private: 93 private:
94 BoolFolding(const BoolFolding &) = delete; 94 BoolFolding(const BoolFolding &) = delete;
95 BoolFolding &operator=(const BoolFolding &) = delete; 95 BoolFolding &operator=(const BoolFolding &) = delete;
96 96
97 public: 97 public:
98 BoolFolding() = default; 98 BoolFolding() = default;
99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
101 static bool hasComplexLowering(const Inst *Instr); 101 static bool hasComplexLowering(const Inst *Instr);
102 static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
103 BoolFoldingConsumerKind ConsumerKind);
102 void init(CfgNode *Node); 104 void init(CfgNode *Node);
103 const Inst *getProducerFor(const Operand *Opnd) const; 105 const Inst *getProducerFor(const Operand *Opnd) const;
104 void dump(const Cfg *Func) const; 106 void dump(const Cfg *Func) const;
105 107
106 private: 108 private:
107 /// Returns true if Producers contains a valid entry for the given VarNum. 109 /// Returns true if Producers contains a valid entry for the given VarNum.
108 bool containsValid(SizeT VarNum) const { 110 bool containsValid(SizeT VarNum) const {
109 auto Element = Producers.find(VarNum); 111 auto Element = Producers.find(VarNum);
110 return Element != Producers.end() && Element->second.Instr != nullptr; 112 return Element != Producers.end() && Element->second.Instr != nullptr;
111 } 113 }
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
186 return false; 188 return false;
187 case PK_Icmp64: 189 case PK_Icmp64:
188 return true; 190 return true;
189 case PK_Fcmp: 191 case PK_Fcmp:
190 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
191 .C2 != MachineTraits::Cond::Br_None; 193 .C2 != MachineTraits::Cond::Br_None;
192 } 194 }
193 } 195 }
194 196
195 template <class MachineTraits> 197 template <class MachineTraits>
198 bool BoolFolding<MachineTraits>::isValidFolding(
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {
201 switch (ProducerKind) {
202 default:
203 return false;
204 case PK_Icmp32:
205 case PK_Icmp64:
206 case PK_Fcmp:
207 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
208 case PK_Arith:
209 return ConsumerKind == CK_Br;
210 }
211 }
212
213 template <class MachineTraits>
196 void BoolFolding<MachineTraits>::init(CfgNode *Node) { 214 void BoolFolding<MachineTraits>::init(CfgNode *Node) {
197 Producers.clear(); 215 Producers.clear();
198 for (Inst &Instr : Node->getInsts()) { 216 for (Inst &Instr : Node->getInsts()) {
199 // Check whether Instr is a valid producer. 217 // Check whether Instr is a valid producer.
200 Variable *Var = Instr.getDest(); 218 Variable *Var = Instr.getDest();
201 if (!Instr.isDeleted() // only consider non-deleted instructions 219 if (!Instr.isDeleted() // only consider non-deleted instructions
202 && Var // only instructions with an actual dest var 220 && Var // only instructions with an actual dest var
203 && Var->getType() == IceType_i1 // only bool-type dest vars 221 && Var->getType() == IceType_i1 // only bool-type dest vars
204 && getProducerKind(&Instr) != PK_None) { // white-listed instructions 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
205 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); 223 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
206 } 224 }
207 // Check each src variable against the map. 225 // Check each src variable against the map.
208 FOREACH_VAR_IN_INST(Var, Instr) { 226 FOREACH_VAR_IN_INST(Var, Instr) {
209 SizeT VarNum = Var->getIndex(); 227 SizeT VarNum = Var->getIndex();
210 if (containsValid(VarNum)) { 228 if (!containsValid(VarNum))
211 if (IndexOfVarOperandInInst(Var) != 229 continue;
212 0 // All valid consumers use Var as the first source operand 230 // All valid consumers use Var as the first source operand
213 || 231 if (IndexOfVarOperandInInst(Var) != 0) {
214 getConsumerKind(&Instr) == CK_None // must be white-listed 232 setInvalid(VarNum);
215 || 233 continue;
216 (getConsumerKind(&Instr) != CK_Br && // Icmp64 only folds in branch 234 }
217 getProducerKind(Producers[VarNum].Instr) != PK_Icmp32) || 235 // Consumer instructions must be white-listed
218 (Producers[VarNum].IsComplex && // complex can't be multi-use 236 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind
219 Producers[VarNum].NumUses > 0)) { 237 ConsumerKind = getConsumerKind(&Instr);
220 setInvalid(VarNum); 238 if (ConsumerKind == CK_None) {
221 continue; 239 setInvalid(VarNum);
222 } 240 continue;
223 ++Producers[VarNum].NumUses; 241 }
224 if (Instr.isLastUse(Var)) { 242 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
225 Producers[VarNum].IsLiveOut = false; 243 ProducerKind = getProducerKind(Producers[VarNum].Instr);
226 } 244 if (!isValidFolding(ProducerKind, ConsumerKind)) {
245 setInvalid(VarNum);
246 continue;
247 }
248 // Avoid creating multiple copies of complex producer instructions.
249 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
250 setInvalid(VarNum);
251 continue;
252 }
253 ++Producers[VarNum].NumUses;
254 if (Instr.isLastUse(Var)) {
255 Producers[VarNum].IsLiveOut = false;
227 } 256 }
228 } 257 }
229 } 258 }
230 for (auto &I : Producers) { 259 for (auto &I : Producers) {
231 // Ignore entries previously marked invalid. 260 // Ignore entries previously marked invalid.
232 if (I.second.Instr == nullptr) 261 if (I.second.Instr == nullptr)
233 continue; 262 continue;
234 // Disable the producer if its dest may be live beyond this block. 263 // Disable the producer if its dest may be live beyond this block.
235 if (I.second.IsLiveOut) { 264 if (I.second.IsLiveOut) {
236 setInvalid(I.first); 265 setInvalid(I.first);
(...skipping 1027 matching lines...) Expand 10 before | Expand all | Expand 10 after
1264 // test t1, 0x20 1293 // test t1, 0x20
1265 // je L1 1294 // je L1
1266 // use(t3) 1295 // use(t3)
1267 // t3 = t2 1296 // t3 = t2
1268 // t2 = 0 1297 // t2 = 0
1269 _shld(T_3, T_2, T_1); 1298 _shld(T_3, T_2, T_1);
1270 _shl(T_2, T_1); 1299 _shl(T_2, T_1);
1271 _test(T_1, BitTest); 1300 _test(T_1, BitTest);
1272 _br(Traits::Cond::Br_e, Label); 1301 _br(Traits::Cond::Br_e, Label);
1273 // T_2 and T_3 are being assigned again because of the intra-block control 1302 // T_2 and T_3 are being assigned again because of the intra-block control
1274 // flow, so we need the _mov_redefined variant to avoid liveness problems. 1303 // flow, so we need to use _redefined to avoid liveness problems.
1275 _mov_redefined(T_3, T_2); 1304 _redefined(_mov(T_3, T_2));
1276 _mov_redefined(T_2, Zero); 1305 _redefined(_mov(T_2, Zero));
1277 } break; 1306 } break;
1278 case InstArithmetic::Lshr: { 1307 case InstArithmetic::Lshr: {
1279 // a=b>>c (unsigned) ==> 1308 // a=b>>c (unsigned) ==>
1280 // t2 = shrd t2, t3, t1 1309 // t2 = shrd t2, t3, t1
1281 // t3 = shr t3, t1 1310 // t3 = shr t3, t1
1282 // test t1, 0x20 1311 // test t1, 0x20
1283 // je L1 1312 // je L1
1284 // use(t2) 1313 // use(t2)
1285 // t2 = t3 1314 // t2 = t3
1286 // t3 = 0 1315 // t3 = 0
1287 _shrd(T_2, T_3, T_1); 1316 _shrd(T_2, T_3, T_1);
1288 _shr(T_3, T_1); 1317 _shr(T_3, T_1);
1289 _test(T_1, BitTest); 1318 _test(T_1, BitTest);
1290 _br(Traits::Cond::Br_e, Label); 1319 _br(Traits::Cond::Br_e, Label);
1291 // T_2 and T_3 are being assigned again because of the intra-block control 1320 // T_2 and T_3 are being assigned again because of the intra-block control
1292 // flow, so we need the _mov_redefined variant to avoid liveness problems. 1321 // flow, so we need to use _redefined to avoid liveness problems.
1293 _mov_redefined(T_2, T_3); 1322 _redefined(_mov(T_2, T_3));
1294 _mov_redefined(T_3, Zero); 1323 _redefined(_mov(T_3, Zero));
1295 } break; 1324 } break;
1296 case InstArithmetic::Ashr: { 1325 case InstArithmetic::Ashr: {
1297 // a=b>>c (signed) ==> 1326 // a=b>>c (signed) ==>
1298 // t2 = shrd t2, t3, t1 1327 // t2 = shrd t2, t3, t1
1299 // t3 = sar t3, t1 1328 // t3 = sar t3, t1
1300 // test t1, 0x20 1329 // test t1, 0x20
1301 // je L1 1330 // je L1
1302 // use(t2) 1331 // use(t2)
1303 // t2 = t3 1332 // t2 = t3
1304 // t3 = sar t3, 0x1f 1333 // t3 = sar t3, 0x1f
1305 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1334 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1306 _shrd(T_2, T_3, T_1); 1335 _shrd(T_2, T_3, T_1);
1307 _sar(T_3, T_1); 1336 _sar(T_3, T_1);
1308 _test(T_1, BitTest); 1337 _test(T_1, BitTest);
1309 _br(Traits::Cond::Br_e, Label); 1338 _br(Traits::Cond::Br_e, Label);
1310 // T_2 and T_3 are being assigned again because of the intra-block control 1339 // T_2 and T_3 are being assigned again because of the intra-block control
1311 // flow, so T_2 needs the _mov_redefined variant to avoid liveness 1340 // flow, so T_2 needs to use _redefined to avoid liveness problems. T_3
1312 // problems. T_3 doesn't need special treatment because it is reassigned 1341 // doesn't need special treatment because it is reassigned via _sar
1313 // via _sar instead of _mov. 1342 // instead of _mov.
1314 _mov_redefined(T_2, T_3); 1343 _redefined(_mov(T_2, T_3));
1315 _sar(T_3, SignExtend); 1344 _sar(T_3, SignExtend);
1316 } break; 1345 } break;
1317 } 1346 }
1318 // COMMON SUFFIX OF: a=b SHIFT_OP c ==> 1347 // COMMON SUFFIX OF: a=b SHIFT_OP c ==>
1319 // L1: 1348 // L1:
1320 // a.lo = t2 1349 // a.lo = t2
1321 // a.hi = t3 1350 // a.hi = t3
1322 Context.insert(Label); 1351 Context.insert(Label);
1323 _mov(DestLo, T_2); 1352 _mov(DestLo, T_2);
1324 _mov(DestHi, T_3); 1353 _mov(DestHi, T_3);
(...skipping 553 matching lines...) Expand 10 before | Expand all | Expand 10 after
1878 } 1907 }
1879 } 1908 }
1880 1909
1881 template <class Machine> 1910 template <class Machine>
1882 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1911 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1883 Variable *Dest = Inst->getDest(); 1912 Variable *Dest = Inst->getDest();
1884 if (Dest->isRematerializable()) { 1913 if (Dest->isRematerializable()) {
1885 Context.insert(InstFakeDef::create(Func, Dest)); 1914 Context.insert(InstFakeDef::create(Func, Dest));
1886 return; 1915 return;
1887 } 1916 }
1888 Operand *Src0 = Inst->getSrc(0); 1917 Operand *Src = Inst->getSrc(0);
1889 assert(Dest->getType() == Src0->getType()); 1918 assert(Dest->getType() == Src->getType());
1890 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1919 lowerMove(Dest, Src, false);
1891 Src0 = legalize(Src0);
1892 Operand *Src0Lo = loOperand(Src0);
1893 Operand *Src0Hi = hiOperand(Src0);
1894 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1895 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1896 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1897 _mov(T_Lo, Src0Lo);
1898 _mov(DestLo, T_Lo);
1899 _mov(T_Hi, Src0Hi);
1900 _mov(DestHi, T_Hi);
1901 } else {
1902 Operand *Src0Legal;
1903 if (Dest->hasReg()) {
1904 // If Dest already has a physical register, then only basic legalization
1905 // is needed, as the source operand can be a register, immediate, or
1906 // memory.
1907 Src0Legal = legalize(Src0, Legal_Reg, Dest->getRegNum());
1908 } else {
1909 // If Dest could be a stack operand, then RI must be a physical register
1910 // or a scalar integer immediate.
1911 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
1912 }
1913 if (isVectorType(Dest->getType()))
1914 _movp(Dest, Src0Legal);
1915 else
1916 _mov(Dest, Src0Legal);
1917 }
1918 } 1920 }
1919 1921
1920 template <class Machine> 1922 template <class Machine>
1921 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { 1923 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) {
1922 if (Inst->isUnconditional()) { 1924 if (Br->isUnconditional()) {
1923 _br(Inst->getTargetUnconditional()); 1925 _br(Br->getTargetUnconditional());
1924 return; 1926 return;
1925 } 1927 }
1926 Operand *Cond = Inst->getCondition(); 1928 Operand *Cond = Br->getCondition();
1927 1929
1928 // Handle folding opportunities. 1930 // Handle folding opportunities.
1929 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 1931 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1930 assert(Producer->isDeleted()); 1932 assert(Producer->isDeleted());
1931 switch (BoolFolding::getProducerKind(Producer)) { 1933 switch (BoolFolding::getProducerKind(Producer)) {
1932 default: 1934 default:
1933 break; 1935 break;
1934 case BoolFolding::PK_Icmp32: 1936 case BoolFolding::PK_Icmp32:
1935 case BoolFolding::PK_Icmp64: { 1937 case BoolFolding::PK_Icmp64: {
1936 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); 1938 lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Br);
1937 return; 1939 return;
1938 } 1940 }
1939 case BoolFolding::PK_Fcmp: { 1941 case BoolFolding::PK_Fcmp: {
1940 lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst); 1942 lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Br);
1941 return; 1943 return;
1942 } 1944 }
1943 case BoolFolding::PK_Arith: { 1945 case BoolFolding::PK_Arith: {
1944 lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst); 1946 lowerArithAndConsumer(llvm::dyn_cast<InstArithmetic>(Producer), Br);
1945 return; 1947 return;
1946 } 1948 }
1947 } 1949 }
1948 } 1950 }
1949 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 1951 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1950 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1952 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1951 _cmp(Src0, Zero); 1953 _cmp(Src0, Zero);
1952 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 1954 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
1953 } 1955 }
1954 1956
1955 template <class Machine> 1957 template <class Machine>
1956 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { 1958 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1957 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 1959 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1958 InstCast::OpKind CastKind = Inst->getCastKind(); 1960 InstCast::OpKind CastKind = Inst->getCastKind();
1959 Variable *Dest = Inst->getDest(); 1961 Variable *Dest = Inst->getDest();
1960 Type DestTy = Dest->getType(); 1962 Type DestTy = Dest->getType();
1961 switch (CastKind) { 1963 switch (CastKind) {
1962 default: 1964 default:
(...skipping 513 matching lines...) Expand 10 before | Expand all | Expand 10 after
2476 lowerCast(Cast); 2478 lowerCast(Cast);
2477 ExtractedElementR = T; 2479 ExtractedElementR = T;
2478 } 2480 }
2479 2481
2480 // Copy the element to the destination. 2482 // Copy the element to the destination.
2481 Variable *Dest = Inst->getDest(); 2483 Variable *Dest = Inst->getDest();
2482 _mov(Dest, ExtractedElementR); 2484 _mov(Dest, ExtractedElementR);
2483 } 2485 }
2484 2486
2485 template <class Machine> 2487 template <class Machine>
2486 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { 2488 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) {
2487 constexpr InstBr *Br = nullptr; 2489 Variable *Dest = Fcmp->getDest();
2488 lowerFcmpAndBr(Inst, Br); 2490
2491 if (isVectorType(Dest->getType())) {
2492 lowerFcmpVector(Fcmp);
2493 } else {
2494 constexpr Inst *Consumer = nullptr;
2495 lowerFcmpAndConsumer(Fcmp, Consumer);
2496 }
2489 } 2497 }
2490 2498
2491 template <class Machine> 2499 template <class Machine>
2492 void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, 2500 void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
2493 const InstBr *Br) { 2501 const Inst *Consumer) {
2494 Operand *Src0 = Inst->getSrc(0); 2502 Operand *Src0 = Fcmp->getSrc(0);
2495 Operand *Src1 = Inst->getSrc(1); 2503 Operand *Src1 = Fcmp->getSrc(1);
2496 Variable *Dest = Inst->getDest(); 2504 Variable *Dest = Fcmp->getDest();
2497 2505
2498 if (isVectorType(Dest->getType())) { 2506 if (isVectorType(Dest->getType()))
2499 if (Br) 2507 llvm::report_fatal_error("Vector compare/branch cannot be folded");
2500 llvm::report_fatal_error("vector compare/branch cannot be folded");
2501 InstFcmp::FCond Condition = Inst->getCondition();
2502 size_t Index = static_cast<size_t>(Condition);
2503 assert(Index < Traits::TableFcmpSize);
2504 2508
2505 if (Traits::TableFcmp[Index].SwapVectorOperands) 2509 if (Consumer != nullptr) {
2506 std::swap(Src0, Src1); 2510 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2507 2511 if (lowerOptimizeFcmpSelect(Fcmp, Select))
2508 Variable *T = nullptr; 2512 return;
2509
2510 if (Condition == InstFcmp::True) {
2511 // makeVectorOfOnes() requires an integer vector type.
2512 T = makeVectorOfMinusOnes(IceType_v4i32);
2513 } else if (Condition == InstFcmp::False) {
2514 T = makeVectorOfZeros(Dest->getType());
2515 } else {
2516 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2517 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2518 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2519 Src1RM = legalizeToReg(Src1RM);
2520
2521 switch (Condition) {
2522 default: {
2523 typename Traits::Cond::CmppsCond Predicate =
2524 Traits::TableFcmp[Index].Predicate;
2525 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2526 T = makeReg(Src0RM->getType());
2527 _movp(T, Src0RM);
2528 _cmpps(T, Src1RM, Predicate);
2529 } break;
2530 case InstFcmp::One: {
2531 // Check both unequal and ordered.
2532 T = makeReg(Src0RM->getType());
2533 Variable *T2 = makeReg(Src0RM->getType());
2534 _movp(T, Src0RM);
2535 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
2536 _movp(T2, Src0RM);
2537 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
2538 _pand(T, T2);
2539 } break;
2540 case InstFcmp::Ueq: {
2541 // Check both equal or unordered.
2542 T = makeReg(Src0RM->getType());
2543 Variable *T2 = makeReg(Src0RM->getType());
2544 _movp(T, Src0RM);
2545 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
2546 _movp(T2, Src0RM);
2547 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
2548 _por(T, T2);
2549 } break;
2550 }
2551 } 2513 }
2552
2553 _movp(Dest, T);
2554 eliminateNextVectorSextInstruction(Dest);
2555 return;
2556 } 2514 }
2557 2515
2558 // Lowering a = fcmp cond, b, c 2516 // Lowering a = fcmp cond, b, c
2559 // ucomiss b, c /* only if C1 != Br_None */ 2517 // ucomiss b, c /* only if C1 != Br_None */
2560 // /* but swap b,c order if SwapOperands==true */ 2518 // /* but swap b,c order if SwapOperands==true */
2561 // mov a, <default> 2519 // mov a, <default>
2562 // j<C1> label /* only if C1 != Br_None */ 2520 // j<C1> label /* only if C1 != Br_None */
2563 // j<C2> label /* only if C2 != Br_None */ 2521 // j<C2> label /* only if C2 != Br_None */
2564 // FakeUse(a) /* only if C1 != Br_None */ 2522 // FakeUse(a) /* only if C1 != Br_None */
2565 // mov a, !<default> /* only if C1 != Br_None */ 2523 // mov a, !<default> /* only if C1 != Br_None */
2566 // label: /* only if C1 != Br_None */ 2524 // label: /* only if C1 != Br_None */
2567 // 2525 //
2568 // setcc lowering when C1 != Br_None && C2 == Br_None: 2526 // setcc lowering when C1 != Br_None && C2 == Br_None:
2569 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ 2527 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2570 // setcc a, C1 2528 // setcc a, C1
2571 InstFcmp::FCond Condition = Inst->getCondition(); 2529 InstFcmp::FCond Condition = Fcmp->getCondition();
2572 size_t Index = static_cast<size_t>(Condition); 2530 size_t Index = static_cast<size_t>(Condition);
2573 assert(Index < Traits::TableFcmpSize); 2531 assert(Index < Traits::TableFcmpSize);
2574 if (Traits::TableFcmp[Index].SwapScalarOperands) 2532 if (Traits::TableFcmp[Index].SwapScalarOperands)
2575 std::swap(Src0, Src1); 2533 std::swap(Src0, Src1);
2576 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); 2534 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2577 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); 2535 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
2578 if (HasC1) { 2536 if (HasC1) {
2579 Src0 = legalize(Src0); 2537 Src0 = legalize(Src0);
2580 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2538 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2581 Variable *T = nullptr; 2539 Variable *T = nullptr;
2582 _mov(T, Src0); 2540 _mov(T, Src0);
2583 _ucomiss(T, Src1RM); 2541 _ucomiss(T, Src1RM);
2584 if (!HasC2) { 2542 if (!HasC2) {
2585 assert(Traits::TableFcmp[Index].Default); 2543 assert(Traits::TableFcmp[Index].Default);
2586 setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br); 2544 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer);
2587 return; 2545 return;
2588 } 2546 }
2589 } 2547 }
2590 int32_t IntDefault = Traits::TableFcmp[Index].Default; 2548 int32_t IntDefault = Traits::TableFcmp[Index].Default;
2591 if (Br == nullptr) { 2549 if (Consumer == nullptr) {
2592 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); 2550 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
2593 _mov(Dest, Default); 2551 _mov(Dest, Default);
2594 if (HasC1) { 2552 if (HasC1) {
2595 typename Traits::Insts::Label *Label = 2553 typename Traits::Insts::Label *Label =
2596 Traits::Insts::Label::create(Func, this); 2554 Traits::Insts::Label::create(Func, this);
2597 _br(Traits::TableFcmp[Index].C1, Label); 2555 _br(Traits::TableFcmp[Index].C1, Label);
2598 if (HasC2) { 2556 if (HasC2) {
2599 _br(Traits::TableFcmp[Index].C2, Label); 2557 _br(Traits::TableFcmp[Index].C2, Label);
2600 } 2558 }
2601 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); 2559 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
2602 _mov_redefined(Dest, NonDefault); 2560 _redefined(_mov(Dest, NonDefault));
2603 Context.insert(Label); 2561 Context.insert(Label);
2604 } 2562 }
2605 } else { 2563 return;
2564 }
2565 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2606 CfgNode *TrueSucc = Br->getTargetTrue(); 2566 CfgNode *TrueSucc = Br->getTargetTrue();
2607 CfgNode *FalseSucc = Br->getTargetFalse(); 2567 CfgNode *FalseSucc = Br->getTargetFalse();
2608 if (IntDefault != 0) 2568 if (IntDefault != 0)
2609 std::swap(TrueSucc, FalseSucc); 2569 std::swap(TrueSucc, FalseSucc);
2610 if (HasC1) { 2570 if (HasC1) {
2611 _br(Traits::TableFcmp[Index].C1, FalseSucc); 2571 _br(Traits::TableFcmp[Index].C1, FalseSucc);
2612 if (HasC2) { 2572 if (HasC2) {
2613 _br(Traits::TableFcmp[Index].C2, FalseSucc); 2573 _br(Traits::TableFcmp[Index].C2, FalseSucc);
2614 } 2574 }
2615 _br(TrueSucc); 2575 _br(TrueSucc);
2616 return; 2576 return;
2617 } 2577 }
2618 _br(FalseSucc); 2578 _br(FalseSucc);
2579 return;
2619 } 2580 }
2581 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2582 Operand *SrcT = Select->getTrueOperand();
2583 Operand *SrcF = Select->getFalseOperand();
2584 Variable *SelectDest = Select->getDest();
2585 if (IntDefault != 0)
2586 std::swap(SrcT, SrcF);
2587 lowerMove(SelectDest, SrcF, false);
2588 if (HasC1) {
2589 typename Traits::Insts::Label *Label =
2590 Traits::Insts::Label::create(Func, this);
2591 _br(Traits::TableFcmp[Index].C1, Label);
2592 if (HasC2) {
2593 _br(Traits::TableFcmp[Index].C2, Label);
2594 }
2595 static constexpr bool IsRedefinition = true;
2596 lowerMove(SelectDest, SrcT, IsRedefinition);
2597 Context.insert(Label);
2598 }
2599 return;
2600 }
2601 llvm::report_fatal_error("Unexpected consumer type");
2602 }
2603
2604 template <class Machine>
2605 void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) {
2606 Operand *Src0 = Fcmp->getSrc(0);
2607 Operand *Src1 = Fcmp->getSrc(1);
2608 Variable *Dest = Fcmp->getDest();
2609
2610 if (!isVectorType(Dest->getType()))
2611 llvm::report_fatal_error("Expected vector compare");
2612
2613 InstFcmp::FCond Condition = Fcmp->getCondition();
2614 size_t Index = static_cast<size_t>(Condition);
2615 assert(Index < Traits::TableFcmpSize);
2616
2617 if (Traits::TableFcmp[Index].SwapVectorOperands)
2618 std::swap(Src0, Src1);
2619
2620 Variable *T = nullptr;
2621
2622 if (Condition == InstFcmp::True) {
2623 // makeVectorOfOnes() requires an integer vector type.
2624 T = makeVectorOfMinusOnes(IceType_v4i32);
2625 } else if (Condition == InstFcmp::False) {
2626 T = makeVectorOfZeros(Dest->getType());
2627 } else {
2628 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2629 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2630 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2631 Src1RM = legalizeToReg(Src1RM);
2632
2633 switch (Condition) {
2634 default: {
2635 typename Traits::Cond::CmppsCond Predicate =
2636 Traits::TableFcmp[Index].Predicate;
2637 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2638 T = makeReg(Src0RM->getType());
2639 _movp(T, Src0RM);
2640 _cmpps(T, Src1RM, Predicate);
2641 } break;
2642 case InstFcmp::One: {
2643 // Check both unequal and ordered.
2644 T = makeReg(Src0RM->getType());
2645 Variable *T2 = makeReg(Src0RM->getType());
2646 _movp(T, Src0RM);
2647 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
2648 _movp(T2, Src0RM);
2649 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
2650 _pand(T, T2);
2651 } break;
2652 case InstFcmp::Ueq: {
2653 // Check both equal or unordered.
2654 T = makeReg(Src0RM->getType());
2655 Variable *T2 = makeReg(Src0RM->getType());
2656 _movp(T, Src0RM);
2657 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
2658 _movp(T2, Src0RM);
2659 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
2660 _por(T, T2);
2661 } break;
2662 }
2663 }
2664
2665 assert(T != nullptr);
2666 _movp(Dest, T);
2667 eliminateNextVectorSextInstruction(Dest);
2620 } 2668 }
2621 2669
2622 inline bool isZero(const Operand *Opnd) { 2670 inline bool isZero(const Operand *Opnd) {
2623 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) 2671 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
2624 return C64->getValue() == 0; 2672 return C64->getValue() == 0;
2625 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) 2673 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
2626 return C32->getValue() == 0; 2674 return C32->getValue() == 0;
2627 return false; 2675 return false;
2628 } 2676 }
2629 2677
2630 template <class Machine> 2678 template <class Machine>
2631 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) { 2679 void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
2632 constexpr InstBr *Br = nullptr; 2680 const Inst *Consumer) {
2633 lowerIcmpAndBr(Inst, Br);
2634 }
2635
2636 template <class Machine>
2637 void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp,
2638 const InstBr *Br) {
2639 Operand *Src0 = legalize(Icmp->getSrc(0)); 2681 Operand *Src0 = legalize(Icmp->getSrc(0));
2640 Operand *Src1 = legalize(Icmp->getSrc(1)); 2682 Operand *Src1 = legalize(Icmp->getSrc(1));
2641 Variable *Dest = Icmp->getDest(); 2683 Variable *Dest = Icmp->getDest();
2642 2684
2643 if (isVectorType(Dest->getType())) { 2685 if (isVectorType(Dest->getType()))
2644 if (Br) 2686 llvm::report_fatal_error("Vector compare/branch cannot be folded");
2645 llvm::report_fatal_error("vector compare/branch cannot be folded");
2646 Type Ty = Src0->getType();
2647 // Promote i1 vectors to 128 bit integer vector types.
2648 if (typeElementType(Ty) == IceType_i1) {
2649 Type NewTy = IceType_NUM;
2650 switch (Ty) {
2651 default:
2652 llvm_unreachable("unexpected type");
2653 break;
2654 case IceType_v4i1:
2655 NewTy = IceType_v4i32;
2656 break;
2657 case IceType_v8i1:
2658 NewTy = IceType_v8i16;
2659 break;
2660 case IceType_v16i1:
2661 NewTy = IceType_v16i8;
2662 break;
2663 }
2664 Variable *NewSrc0 = Func->makeVariable(NewTy);
2665 Variable *NewSrc1 = Func->makeVariable(NewTy);
2666 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2667 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2668 Src0 = NewSrc0;
2669 Src1 = NewSrc1;
2670 Ty = NewTy;
2671 }
2672 2687
2673 InstIcmp::ICond Condition = Icmp->getCondition(); 2688 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2674 2689 lowerIcmp64(Icmp, Consumer);
2675 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2676 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2677
2678 // SSE2 only has signed comparison operations. Transform unsigned inputs in
2679 // a manner that allows for the use of signed comparison operations by
2680 // flipping the high order bits.
2681 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2682 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2683 Variable *T0 = makeReg(Ty);
2684 Variable *T1 = makeReg(Ty);
2685 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2686 _movp(T0, Src0RM);
2687 _pxor(T0, HighOrderBits);
2688 _movp(T1, Src1RM);
2689 _pxor(T1, HighOrderBits);
2690 Src0RM = T0;
2691 Src1RM = T1;
2692 }
2693
2694 Variable *T = makeReg(Ty);
2695 switch (Condition) {
2696 default:
2697 llvm_unreachable("unexpected condition");
2698 break;
2699 case InstIcmp::Eq: {
2700 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2701 Src1RM = legalizeToReg(Src1RM);
2702 _movp(T, Src0RM);
2703 _pcmpeq(T, Src1RM);
2704 } break;
2705 case InstIcmp::Ne: {
2706 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2707 Src1RM = legalizeToReg(Src1RM);
2708 _movp(T, Src0RM);
2709 _pcmpeq(T, Src1RM);
2710 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2711 _pxor(T, MinusOne);
2712 } break;
2713 case InstIcmp::Ugt:
2714 case InstIcmp::Sgt: {
2715 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2716 Src1RM = legalizeToReg(Src1RM);
2717 _movp(T, Src0RM);
2718 _pcmpgt(T, Src1RM);
2719 } break;
2720 case InstIcmp::Uge:
2721 case InstIcmp::Sge: {
2722 // !(Src1RM > Src0RM)
2723 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2724 Src0RM = legalizeToReg(Src0RM);
2725 _movp(T, Src1RM);
2726 _pcmpgt(T, Src0RM);
2727 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2728 _pxor(T, MinusOne);
2729 } break;
2730 case InstIcmp::Ult:
2731 case InstIcmp::Slt: {
2732 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2733 Src0RM = legalizeToReg(Src0RM);
2734 _movp(T, Src1RM);
2735 _pcmpgt(T, Src0RM);
2736 } break;
2737 case InstIcmp::Ule:
2738 case InstIcmp::Sle: {
2739 // !(Src0RM > Src1RM)
2740 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2741 Src1RM = legalizeToReg(Src1RM);
2742 _movp(T, Src0RM);
2743 _pcmpgt(T, Src1RM);
2744 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2745 _pxor(T, MinusOne);
2746 } break;
2747 }
2748
2749 _movp(Dest, T);
2750 eliminateNextVectorSextInstruction(Dest);
2751 return; 2690 return;
2752 } 2691 }
2753 2692
2754 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2755 lowerIcmp64(Icmp, Br);
2756 return;
2757 }
2758
2759 // cmp b, c 2693 // cmp b, c
2760 if (isZero(Src1)) { 2694 if (isZero(Src1)) {
2761 switch (Icmp->getCondition()) { 2695 switch (Icmp->getCondition()) {
2762 default: 2696 default:
2763 break; 2697 break;
2764 case InstIcmp::Uge: 2698 case InstIcmp::Uge:
2765 movOrBr(true, Dest, Br); 2699 movOrConsumer(true, Dest, Consumer);
2766 return; 2700 return;
2767 case InstIcmp::Ult: 2701 case InstIcmp::Ult:
2768 movOrBr(false, Dest, Br); 2702 movOrConsumer(false, Dest, Consumer);
2769 return; 2703 return;
2770 } 2704 }
2771 } 2705 }
2772 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2706 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2773 _cmp(Src0RM, Src1); 2707 _cmp(Src0RM, Src1);
2774 setccOrBr(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, Br); 2708 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
2709 Consumer);
2710 }
2711
2712 template <class Machine>
2713 void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) {
2714 Operand *Src0 = legalize(Icmp->getSrc(0));
2715 Operand *Src1 = legalize(Icmp->getSrc(1));
2716 Variable *Dest = Icmp->getDest();
2717
2718 if (!isVectorType(Dest->getType()))
2719 llvm::report_fatal_error("Expected a vector compare");
2720
2721 Type Ty = Src0->getType();
2722 // Promote i1 vectors to 128 bit integer vector types.
2723 if (typeElementType(Ty) == IceType_i1) {
2724 Type NewTy = IceType_NUM;
2725 switch (Ty) {
2726 default:
2727 llvm::report_fatal_error("unexpected type");
2728 break;
2729 case IceType_v4i1:
2730 NewTy = IceType_v4i32;
2731 break;
2732 case IceType_v8i1:
2733 NewTy = IceType_v8i16;
2734 break;
2735 case IceType_v16i1:
2736 NewTy = IceType_v16i8;
2737 break;
2738 }
2739 Variable *NewSrc0 = Func->makeVariable(NewTy);
2740 Variable *NewSrc1 = Func->makeVariable(NewTy);
2741 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2742 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2743 Src0 = NewSrc0;
2744 Src1 = NewSrc1;
2745 Ty = NewTy;
2746 }
2747
2748 InstIcmp::ICond Condition = Icmp->getCondition();
2749
2750 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2751 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2752
2753 // SSE2 only has signed comparison operations. Transform unsigned inputs in
2754 // a manner that allows for the use of signed comparison operations by
2755 // flipping the high order bits.
2756 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2757 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2758 Variable *T0 = makeReg(Ty);
2759 Variable *T1 = makeReg(Ty);
2760 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2761 _movp(T0, Src0RM);
2762 _pxor(T0, HighOrderBits);
2763 _movp(T1, Src1RM);
2764 _pxor(T1, HighOrderBits);
2765 Src0RM = T0;
2766 Src1RM = T1;
2767 }
2768
2769 Variable *T = makeReg(Ty);
2770 switch (Condition) {
2771 default:
2772 llvm_unreachable("unexpected condition");
2773 break;
2774 case InstIcmp::Eq: {
2775 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2776 Src1RM = legalizeToReg(Src1RM);
2777 _movp(T, Src0RM);
2778 _pcmpeq(T, Src1RM);
2779 } break;
2780 case InstIcmp::Ne: {
2781 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2782 Src1RM = legalizeToReg(Src1RM);
2783 _movp(T, Src0RM);
2784 _pcmpeq(T, Src1RM);
2785 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2786 _pxor(T, MinusOne);
2787 } break;
2788 case InstIcmp::Ugt:
2789 case InstIcmp::Sgt: {
2790 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2791 Src1RM = legalizeToReg(Src1RM);
2792 _movp(T, Src0RM);
2793 _pcmpgt(T, Src1RM);
2794 } break;
2795 case InstIcmp::Uge:
2796 case InstIcmp::Sge: {
2797 // !(Src1RM > Src0RM)
2798 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2799 Src0RM = legalizeToReg(Src0RM);
2800 _movp(T, Src1RM);
2801 _pcmpgt(T, Src0RM);
2802 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2803 _pxor(T, MinusOne);
2804 } break;
2805 case InstIcmp::Ult:
2806 case InstIcmp::Slt: {
2807 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2808 Src0RM = legalizeToReg(Src0RM);
2809 _movp(T, Src1RM);
2810 _pcmpgt(T, Src0RM);
2811 } break;
2812 case InstIcmp::Ule:
2813 case InstIcmp::Sle: {
2814 // !(Src0RM > Src1RM)
2815 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2816 Src1RM = legalizeToReg(Src1RM);
2817 _movp(T, Src0RM);
2818 _pcmpgt(T, Src1RM);
2819 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2820 _pxor(T, MinusOne);
2821 } break;
2822 }
2823
2824 _movp(Dest, T);
2825 eliminateNextVectorSextInstruction(Dest);
2775 } 2826 }
2776 2827
2777 template <typename Machine> 2828 template <typename Machine>
2778 template <typename T> 2829 template <typename T>
2779 typename std::enable_if<!T::Is64Bit, void>::type 2830 typename std::enable_if<!T::Is64Bit, void>::type
2780 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { 2831 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,
2832 const Inst *Consumer) {
2781 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2833 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2782 Operand *Src0 = legalize(Icmp->getSrc(0)); 2834 Operand *Src0 = legalize(Icmp->getSrc(0));
2783 Operand *Src1 = legalize(Icmp->getSrc(1)); 2835 Operand *Src1 = legalize(Icmp->getSrc(1));
2784 Variable *Dest = Icmp->getDest(); 2836 Variable *Dest = Icmp->getDest();
2785 InstIcmp::ICond Condition = Icmp->getCondition(); 2837 InstIcmp::ICond Condition = Icmp->getCondition();
2786 size_t Index = static_cast<size_t>(Condition); 2838 size_t Index = static_cast<size_t>(Condition);
2787 assert(Index < Traits::TableIcmp64Size); 2839 assert(Index < Traits::TableIcmp64Size);
2788 Operand *Src0LoRM = nullptr; 2840 Operand *Src0LoRM = nullptr;
2789 Operand *Src0HiRM = nullptr; 2841 Operand *Src0HiRM = nullptr;
2790 // Legalize the portions of Src0 that are going to be needed. 2842 // Legalize the portions of Src0 that are going to be needed.
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2828 default: 2880 default:
2829 llvm_unreachable("unexpected condition"); 2881 llvm_unreachable("unexpected condition");
2830 break; 2882 break;
2831 case InstIcmp::Eq: 2883 case InstIcmp::Eq:
2832 case InstIcmp::Ule: 2884 case InstIcmp::Ule:
2833 // Mov Src0HiRM first, because it was legalized most recently, and will 2885 // Mov Src0HiRM first, because it was legalized most recently, and will
2834 // sometimes avoid a move before the OR. 2886 // sometimes avoid a move before the OR.
2835 _mov(Temp, Src0HiRM); 2887 _mov(Temp, Src0HiRM);
2836 _or(Temp, Src0LoRM); 2888 _or(Temp, Src0LoRM);
2837 Context.insert(InstFakeUse::create(Func, Temp)); 2889 Context.insert(InstFakeUse::create(Func, Temp));
2838 setccOrBr(Traits::Cond::Br_e, Dest, Br); 2890 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
2839 return; 2891 return;
2840 case InstIcmp::Ne: 2892 case InstIcmp::Ne:
2841 case InstIcmp::Ugt: 2893 case InstIcmp::Ugt:
2842 // Mov Src0HiRM first, because it was legalized most recently, and will 2894 // Mov Src0HiRM first, because it was legalized most recently, and will
2843 // sometimes avoid a move before the OR. 2895 // sometimes avoid a move before the OR.
2844 _mov(Temp, Src0HiRM); 2896 _mov(Temp, Src0HiRM);
2845 _or(Temp, Src0LoRM); 2897 _or(Temp, Src0LoRM);
2846 Context.insert(InstFakeUse::create(Func, Temp)); 2898 Context.insert(InstFakeUse::create(Func, Temp));
2847 setccOrBr(Traits::Cond::Br_ne, Dest, Br); 2899 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
2848 return; 2900 return;
2849 case InstIcmp::Uge: 2901 case InstIcmp::Uge:
2850 movOrBr(true, Dest, Br); 2902 movOrConsumer(true, Dest, Consumer);
2851 return; 2903 return;
2852 case InstIcmp::Ult: 2904 case InstIcmp::Ult:
2853 movOrBr(false, Dest, Br); 2905 movOrConsumer(false, Dest, Consumer);
2854 return; 2906 return;
2855 case InstIcmp::Sgt: 2907 case InstIcmp::Sgt:
2856 break; 2908 break;
2857 case InstIcmp::Sge: 2909 case InstIcmp::Sge:
2858 _test(Src0HiRM, SignMask); 2910 _test(Src0HiRM, SignMask);
2859 setccOrBr(Traits::Cond::Br_e, Dest, Br); 2911 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
2860 return; 2912 return;
2861 case InstIcmp::Slt: 2913 case InstIcmp::Slt:
2862 _test(Src0HiRM, SignMask); 2914 _test(Src0HiRM, SignMask);
2863 setccOrBr(Traits::Cond::Br_ne, Dest, Br); 2915 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
2864 return; 2916 return;
2865 case InstIcmp::Sle: 2917 case InstIcmp::Sle:
2866 break; 2918 break;
2867 } 2919 }
2868 } 2920 }
2869 // Handle general compares. 2921 // Handle general compares.
2870 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2922 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2871 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2923 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2872 if (Br == nullptr) { 2924 if (Consumer == nullptr) {
2873 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); 2925 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
2874 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); 2926 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
2875 typename Traits::Insts::Label *LabelFalse = 2927 typename Traits::Insts::Label *LabelFalse =
2876 Traits::Insts::Label::create(Func, this); 2928 Traits::Insts::Label::create(Func, this);
2877 typename Traits::Insts::Label *LabelTrue = 2929 typename Traits::Insts::Label *LabelTrue =
2878 Traits::Insts::Label::create(Func, this); 2930 Traits::Insts::Label::create(Func, this);
2879 _mov(Dest, One); 2931 _mov(Dest, One);
2880 _cmp(Src0HiRM, Src1HiRI); 2932 _cmp(Src0HiRM, Src1HiRI);
2881 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2933 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2882 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 2934 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2883 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2935 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2884 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 2936 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2885 _cmp(Src0LoRM, Src1LoRI); 2937 _cmp(Src0LoRM, Src1LoRI);
2886 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 2938 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2887 Context.insert(LabelFalse); 2939 Context.insert(LabelFalse);
2888 _mov_redefined(Dest, Zero); 2940 _redefined(_mov(Dest, Zero));
2889 Context.insert(LabelTrue); 2941 Context.insert(LabelTrue);
2890 } else { 2942 return;
2943 }
2944 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2891 _cmp(Src0HiRM, Src1HiRI); 2945 _cmp(Src0HiRM, Src1HiRI);
2892 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2946 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2893 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); 2947 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue());
2894 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2948 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2895 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); 2949 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse());
2896 _cmp(Src0LoRM, Src1LoRI); 2950 _cmp(Src0LoRM, Src1LoRI);
2897 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), 2951 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(),
2898 Br->getTargetFalse()); 2952 Br->getTargetFalse());
2953 return;
2899 } 2954 }
2955 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2956 Operand *SrcT = Select->getTrueOperand();
2957 Operand *SrcF = Select->getFalseOperand();
2958 Variable *SelectDest = Select->getDest();
2959 typename Traits::Insts::Label *LabelFalse =
2960 Traits::Insts::Label::create(Func, this);
2961 typename Traits::Insts::Label *LabelTrue =
2962 Traits::Insts::Label::create(Func, this);
2963 lowerMove(SelectDest, SrcT, false);
2964 _cmp(Src0HiRM, Src1HiRI);
2965 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2966 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2967 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2968 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2969 _cmp(Src0LoRM, Src1LoRI);
2970 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2971 Context.insert(LabelFalse);
2972 static constexpr bool IsRedefinition = true;
2973 lowerMove(SelectDest, SrcF, IsRedefinition);
2974 Context.insert(LabelTrue);
2975 return;
2976 }
2977 llvm::report_fatal_error("Unexpected consumer type");
2900 } 2978 }
2901 2979
2902 template <class Machine> 2980 template <class Machine>
2903 void TargetX86Base<Machine>::setccOrBr(typename Traits::Cond::BrCond Condition, 2981 void TargetX86Base<Machine>::setccOrConsumer(
2904 Variable *Dest, const InstBr *Br) { 2982 typename Traits::Cond::BrCond Condition, Variable *Dest,
2905 if (Br == nullptr) { 2983 const Inst *Consumer) {
2984 if (Consumer == nullptr) {
2906 _setcc(Dest, Condition); 2985 _setcc(Dest, Condition);
2907 } else { 2986 return;
2987 }
2988 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2908 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); 2989 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
2990 return;
2909 } 2991 }
2992 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2993 Operand *SrcT = Select->getTrueOperand();
2994 Operand *SrcF = Select->getFalseOperand();
2995 Variable *SelectDest = Select->getDest();
2996 lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
2997 return;
2998 }
2999 llvm::report_fatal_error("Unexpected consumer type");
2910 } 3000 }
2911 3001
2912 template <class Machine> 3002 template <class Machine>
2913 void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest, 3003 void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest,
2914 const InstBr *Br) { 3004 const Inst *Consumer) {
2915 if (Br == nullptr) { 3005 if (Consumer == nullptr) {
2916 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 3006 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
2917 } else { 3007 return;
3008 }
3009 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2918 // TODO(sehr,stichnot): This could be done with a single unconditional 3010 // TODO(sehr,stichnot): This could be done with a single unconditional
2919 // branch instruction, but subzero doesn't know how to handle the resulting 3011 // branch instruction, but subzero doesn't know how to handle the resulting
2920 // control flow graph changes now. Make it do so to eliminate mov and cmp. 3012 // control flow graph changes now. Make it do so to eliminate mov and cmp.
2921 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 3013 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
2922 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); 3014 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
2923 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 3015 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3016 return;
2924 } 3017 }
3018 if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3019 Operand *Src = nullptr;
3020 if (IcmpResult) {
3021 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
3022 } else {
3023 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
3024 }
3025 Variable *SelectDest = Select->getDest();
3026 lowerMove(SelectDest, Src, false);
3027 return;
3028 }
3029 llvm::report_fatal_error("Unexpected consumer type");
2925 } 3030 }
2926 3031
2927 template <class Machine> 3032 template <class Machine>
2928 void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith, 3033 void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith,
2929 const InstBr *Br) { 3034 const Inst *Consumer) {
2930 Variable *T = nullptr; 3035 Variable *T = nullptr;
2931 Operand *Src0 = legalize(Arith->getSrc(0)); 3036 Operand *Src0 = legalize(Arith->getSrc(0));
2932 Operand *Src1 = legalize(Arith->getSrc(1)); 3037 Operand *Src1 = legalize(Arith->getSrc(1));
2933 Variable *Dest = Arith->getDest(); 3038 Variable *Dest = Arith->getDest();
2934 switch (Arith->getOp()) { 3039 switch (Arith->getOp()) {
2935 default: 3040 default:
2936 llvm_unreachable("arithmetic operator not AND or OR"); 3041 llvm_unreachable("arithmetic operator not AND or OR");
2937 break; 3042 break;
2938 case InstArithmetic::And: 3043 case InstArithmetic::And:
2939 _mov(T, Src0); 3044 _mov(T, Src0);
2940 // Test cannot have an address in the second position. Since T is 3045 // Test cannot have an address in the second position. Since T is
2941 // guaranteed to be a register and Src1 could be a memory load, ensure 3046 // guaranteed to be a register and Src1 could be a memory load, ensure
2942 // that the second argument is a register. 3047 // that the second argument is a register.
2943 if (llvm::isa<Constant>(Src1)) 3048 if (llvm::isa<Constant>(Src1))
2944 _test(T, Src1); 3049 _test(T, Src1);
2945 else 3050 else
2946 _test(Src1, T); 3051 _test(Src1, T);
2947 break; 3052 break;
2948 case InstArithmetic::Or: 3053 case InstArithmetic::Or:
2949 _mov(T, Src0); 3054 _mov(T, Src0);
2950 _or(T, Src1); 3055 _or(T, Src1);
2951 break; 3056 break;
2952 } 3057 }
2953 Context.insert(InstFakeUse::create(Func, T)); 3058
2954 Context.insert(InstFakeDef::create(Func, Dest)); 3059 if (Consumer == nullptr) {
2955 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 3060 llvm::report_fatal_error("Expected a consumer instruction");
3061 }
3062 if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3063 Context.insert(InstFakeUse::create(Func, T));
3064 Context.insert(InstFakeDef::create(Func, Dest));
3065 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3066 return;
3067 }
3068 llvm::report_fatal_error("Unexpected consumer type");
2956 } 3069 }
2957 3070
2958 template <class Machine> 3071 template <class Machine>
2959 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 3072 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2960 Operand *SourceVectNotLegalized = Inst->getSrc(0); 3073 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2961 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 3074 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2962 ConstantInteger32 *ElementIndex = 3075 ConstantInteger32 *ElementIndex =
2963 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 3076 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2964 // Only constant indices are allowed in PNaCl IR. 3077 // Only constant indices are allowed in PNaCl IR.
2965 assert(ElementIndex); 3078 assert(ElementIndex);
(...skipping 462 matching lines...) Expand 10 before | Expand all | Expand 10 after
3428 case Intrinsics::Stacksave: { 3541 case Intrinsics::Stacksave: {
3429 Variable *esp = 3542 Variable *esp =
3430 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); 3543 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3431 Variable *Dest = Instr->getDest(); 3544 Variable *Dest = Instr->getDest();
3432 _mov(Dest, esp); 3545 _mov(Dest, esp);
3433 return; 3546 return;
3434 } 3547 }
3435 case Intrinsics::Stackrestore: { 3548 case Intrinsics::Stackrestore: {
3436 Variable *esp = 3549 Variable *esp =
3437 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); 3550 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3438 _mov_redefined(esp, Instr->getArg(0)); 3551 _redefined(_mov(esp, Instr->getArg(0)));
3439 return; 3552 return;
3440 } 3553 }
3441 case Intrinsics::Trap: 3554 case Intrinsics::Trap:
3442 _ud2(); 3555 _ud2();
3443 return; 3556 return;
3444 case Intrinsics::UnknownIntrinsic: 3557 case Intrinsics::UnknownIntrinsic:
3445 Func->setError("Should not be lowering UnknownIntrinsic"); 3558 Func->setError("Should not be lowering UnknownIntrinsic");
3446 return; 3559 return;
3447 } 3560 }
3448 return; 3561 return;
(...skipping 1161 matching lines...) Expand 10 before | Expand all | Expand 10 after
4610 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); 4723 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
4611 } 4724 }
4612 } 4725 }
4613 4726
4614 template <class Machine> 4727 template <class Machine>
4615 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { 4728 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4616 Func->setError("Phi found in regular instruction list"); 4729 Func->setError("Phi found in regular instruction list");
4617 } 4730 }
4618 4731
4619 template <class Machine> 4732 template <class Machine>
4620 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { 4733 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) {
4621 Variable *Dest = Inst->getDest(); 4734 Variable *Dest = Select->getDest();
4622 Type DestTy = Dest->getType();
4623 Operand *SrcT = Inst->getTrueOperand();
4624 Operand *SrcF = Inst->getFalseOperand();
4625 Operand *Condition = Inst->getCondition();
4626 4735
4627 if (isVectorType(DestTy)) { 4736 if (isVectorType(Dest->getType())) {
4628 Type SrcTy = SrcT->getType(); 4737 lowerSelectVector(Select);
4629 Variable *T = makeReg(SrcTy);
4630 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4631 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4632 if (InstructionSet >= Traits::SSE4_1) {
4633 // TODO(wala): If the condition operand is a constant, use blendps or
4634 // pblendw.
4635 //
4636 // Use blendvps or pblendvb to implement select.
4637 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4638 SrcTy == IceType_v4f32) {
4639 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4640 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
4641 _movp(xmm0, ConditionRM);
4642 _psll(xmm0, Ctx->getConstantInt8(31));
4643 _movp(T, SrcFRM);
4644 _blendvps(T, SrcTRM, xmm0);
4645 _movp(Dest, T);
4646 } else {
4647 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4648 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4649 : IceType_v16i8;
4650 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
4651 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4652 _movp(T, SrcFRM);
4653 _pblendvb(T, SrcTRM, xmm0);
4654 _movp(Dest, T);
4655 }
4656 return;
4657 }
4658 // Lower select without Traits::SSE4.1:
4659 // a=d?b:c ==>
4660 // if elementtype(d) != i1:
4661 // d=sext(d);
4662 // a=(b&d)|(c&~d);
4663 Variable *T2 = makeReg(SrcTy);
4664 // Sign extend the condition operand if applicable.
4665 if (SrcTy == IceType_v4f32) {
4666 // The sext operation takes only integer arguments.
4667 Variable *T3 = Func->makeVariable(IceType_v4i32);
4668 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4669 _movp(T, T3);
4670 } else if (typeElementType(SrcTy) != IceType_i1) {
4671 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4672 } else {
4673 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4674 _movp(T, ConditionRM);
4675 }
4676 _movp(T2, T);
4677 _pand(T, SrcTRM);
4678 _pandn(T2, SrcFRM);
4679 _por(T, T2);
4680 _movp(Dest, T);
4681
4682 return; 4738 return;
4683 } 4739 }
4684 4740
4685 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; 4741 Operand *Condition = Select->getCondition();
4686 Operand *CmpOpnd0 = nullptr;
4687 Operand *CmpOpnd1 = nullptr;
4688 // Handle folding opportunities. 4742 // Handle folding opportunities.
4689 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4743 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4690 assert(Producer->isDeleted()); 4744 assert(Producer->isDeleted());
4691 switch (BoolFolding::getProducerKind(Producer)) { 4745 switch (BoolFolding::getProducerKind(Producer)) {
4692 default: 4746 default:
4693 break; 4747 break;
4694 case BoolFolding::PK_Icmp32: { 4748 case BoolFolding::PK_Icmp32:
4695 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 4749 case BoolFolding::PK_Icmp64: {
4696 Cond = Traits::getIcmp32Mapping(Cmp->getCondition()); 4750 lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Select);
4697 CmpOpnd1 = legalize(Producer->getSrc(1)); 4751 return;
4698 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); 4752 }
4699 } break; 4753 case BoolFolding::PK_Fcmp: {
4754 lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Select);
4755 return;
4756 }
4700 } 4757 }
4701 } 4758 }
4702 if (CmpOpnd0 == nullptr) {
4703 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4704 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4705 }
4706 assert(CmpOpnd0);
4707 assert(CmpOpnd1);
4708 4759
4709 _cmp(CmpOpnd0, CmpOpnd1); 4760 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
4761 Operand *Zero = Ctx->getConstantZero(IceType_i32);
4762 _cmp(CmpResult, Zero);
4763 Operand *SrcT = Select->getTrueOperand();
4764 Operand *SrcF = Select->getFalseOperand();
4765 const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
4766 lowerSelectMove(Dest, Cond, SrcT, SrcF);
4767 }
4768
4769 template <class Machine>
4770 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest,
4771 typename Traits::Cond::BrCond Cond,
4772 Operand *SrcT, Operand *SrcF) {
4773 Type DestTy = Dest->getType();
4710 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { 4774 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4711 // The cmov instruction doesn't allow 8-bit or FP operands, so we need 4775 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
4712 // explicit control flow. 4776 // explicit control flow.
4713 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: 4777 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4714 typename Traits::Insts::Label *Label = 4778 typename Traits::Insts::Label *Label =
4715 Traits::Insts::Label::create(Func, this); 4779 Traits::Insts::Label::create(Func, this);
4716 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4780 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4717 _mov(Dest, SrcT); 4781 _mov(Dest, SrcT);
4718 _br(Cond, Label); 4782 _br(Cond, Label);
4719 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4783 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4720 _mov_redefined(Dest, SrcF); 4784 _redefined(_mov(Dest, SrcF));
4721 Context.insert(Label); 4785 Context.insert(Label);
4722 return; 4786 return;
4723 } 4787 }
4724 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4788 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4725 // But if SrcT is immediate, we might be able to do better, as the cmov 4789 // But if SrcT is immediate, we might be able to do better, as the cmov
4726 // instruction doesn't allow an immediate operand: 4790 // instruction doesn't allow an immediate operand:
4727 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4791 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4728 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4792 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4729 std::swap(SrcT, SrcF); 4793 std::swap(SrcT, SrcF);
4730 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4794 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4731 } 4795 }
4732 if (!Traits::Is64Bit && DestTy == IceType_i64) { 4796 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4733 SrcT = legalizeUndef(SrcT); 4797 SrcT = legalizeUndef(SrcT);
4734 SrcF = legalizeUndef(SrcF); 4798 SrcF = legalizeUndef(SrcF);
4735 // Set the low portion. 4799 // Set the low portion.
4736 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4800 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4737 Variable *TLo = nullptr; 4801 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
4738 Operand *SrcFLo = legalize(loOperand(SrcF));
4739 _mov(TLo, SrcFLo);
4740 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4741 _cmov(TLo, SrcTLo, Cond);
4742 _mov(DestLo, TLo);
4743 // Set the high portion. 4802 // Set the high portion.
4744 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4803 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4745 Variable *THi = nullptr; 4804 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
4746 Operand *SrcFHi = legalize(hiOperand(SrcF));
4747 _mov(THi, SrcFHi);
4748 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4749 _cmov(THi, SrcTHi, Cond);
4750 _mov(DestHi, THi);
4751 return; 4805 return;
4752 } 4806 }
4753 4807
4754 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || 4808 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4755 (Traits::Is64Bit && DestTy == IceType_i64)); 4809 (Traits::Is64Bit && DestTy == IceType_i64));
4810 lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
4811 }
4812
4813 template <class Machine>
4814 void TargetX86Base<Machine>::lowerSelectIntMove(
4815 Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT,
4816 Operand *SrcF) {
4756 Variable *T = nullptr; 4817 Variable *T = nullptr;
4757 SrcF = legalize(SrcF); 4818 SrcF = legalize(SrcF);
4758 _mov(T, SrcF); 4819 _mov(T, SrcF);
4759 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4820 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4760 _cmov(T, SrcT, Cond); 4821 _cmov(T, SrcT, Cond);
4761 _mov(Dest, T); 4822 _mov(Dest, T);
4762 } 4823 }
4763 4824
4764 template <class Machine> 4825 template <class Machine>
4826 void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src,
4827 bool IsRedefinition) {
4828 assert(Dest->getType() == Src->getType());
4829 assert(!Dest->isRematerializable());
4830 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
4831 Src = legalize(Src);
4832 Operand *SrcLo = loOperand(Src);
4833 Operand *SrcHi = hiOperand(Src);
4834 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4835 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4836 Variable *T_Lo = nullptr, *T_Hi = nullptr;
4837 _mov(T_Lo, SrcLo);
4838 _redefined(_mov(DestLo, T_Lo), IsRedefinition);
4839 _mov(T_Hi, SrcHi);
4840 _redefined(_mov(DestHi, T_Hi), IsRedefinition);
4841 } else {
4842 Operand *SrcLegal;
4843 if (Dest->hasReg()) {
4844 // If Dest already has a physical register, then only basic legalization
4845 // is needed, as the source operand can be a register, immediate, or
4846 // memory.
4847 SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum());
4848 } else {
4849 // If Dest could be a stack operand, then RI must be a physical register
4850 // or a scalar integer immediate.
4851 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
4852 }
4853 if (isVectorType(Dest->getType())) {
4854 _redefined(_movp(Dest, SrcLegal), IsRedefinition);
4855 } else {
4856 _redefined(_mov(Dest, SrcLegal), IsRedefinition);
4857 }
4858 }
4859 }
4860
4861 template <class Machine>
4862 bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp,
4863 const InstSelect *Select) {
4864 Operand *CmpSrc0 = Fcmp->getSrc(0);
4865 Operand *CmpSrc1 = Fcmp->getSrc(1);
4866 Operand *SelectSrcT = Select->getTrueOperand();
4867 Operand *SelectSrcF = Select->getFalseOperand();
4868
4869 if (CmpSrc0->getType() != SelectSrcT->getType())
4870 return false;
4871
4872 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
4873 InstFcmp::FCond Condition = Fcmp->getCondition();
4874 switch (Condition) {
4875 default:
4876 return false;
4877 case InstFcmp::True:
4878 case InstFcmp::False:
4879 case InstFcmp::Ogt:
4880 case InstFcmp::Olt:
4881 (void)CmpSrc0;
4882 (void)CmpSrc1;
4883 (void)SelectSrcT;
4884 (void)SelectSrcF;
4885 break;
4886 }
4887 return false;
4888 }
4889
4890 template <class Machine>
4891 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) {
4892 Variable *Dest = Icmp->getDest();
4893 if (isVectorType(Dest->getType())) {
4894 lowerIcmpVector(Icmp);
4895 } else {
4896 constexpr Inst *Consumer = nullptr;
4897 lowerIcmpAndConsumer(Icmp, Consumer);
4898 }
4899 }
4900
4901 template <class Machine>
4902 void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) {
4903 Variable *Dest = Inst->getDest();
4904 Type DestTy = Dest->getType();
4905 Operand *SrcT = Inst->getTrueOperand();
4906 Operand *SrcF = Inst->getFalseOperand();
4907 Operand *Condition = Inst->getCondition();
4908
4909 if (!isVectorType(DestTy))
4910 llvm::report_fatal_error("Expected a vector select");
4911
4912 Type SrcTy = SrcT->getType();
4913 Variable *T = makeReg(SrcTy);
4914 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4915 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4916 if (InstructionSet >= Traits::SSE4_1) {
4917 // TODO(wala): If the condition operand is a constant, use blendps or
4918 // pblendw.
4919 //
4920 // Use blendvps or pblendvb to implement select.
4921 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4922 SrcTy == IceType_v4f32) {
4923 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4924 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
4925 _movp(xmm0, ConditionRM);
4926 _psll(xmm0, Ctx->getConstantInt8(31));
4927 _movp(T, SrcFRM);
4928 _blendvps(T, SrcTRM, xmm0);
4929 _movp(Dest, T);
4930 } else {
4931 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4932 Type SignExtTy =
4933 Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8;
4934 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
4935 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4936 _movp(T, SrcFRM);
4937 _pblendvb(T, SrcTRM, xmm0);
4938 _movp(Dest, T);
4939 }
4940 return;
4941 }
4942 // Lower select without Traits::SSE4.1:
4943 // a=d?b:c ==>
4944 // if elementtype(d) != i1:
4945 // d=sext(d);
4946 // a=(b&d)|(c&~d);
4947 Variable *T2 = makeReg(SrcTy);
4948 // Sign extend the condition operand if applicable.
4949 if (SrcTy == IceType_v4f32) {
4950 // The sext operation takes only integer arguments.
4951 Variable *T3 = Func->makeVariable(IceType_v4i32);
4952 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4953 _movp(T, T3);
4954 } else if (typeElementType(SrcTy) != IceType_i1) {
4955 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4956 } else {
4957 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4958 _movp(T, ConditionRM);
4959 }
4960 _movp(T2, T);
4961 _pand(T, SrcTRM);
4962 _pandn(T2, SrcFRM);
4963 _por(T, T2);
4964 _movp(Dest, T);
4965
4966 return;
4967 }
4968
4969 template <class Machine>
4765 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4970 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4766 Operand *Value = Inst->getData(); 4971 Operand *Value = Inst->getData();
4767 Operand *Addr = Inst->getAddr(); 4972 Operand *Addr = Inst->getAddr();
4768 typename Traits::X86OperandMem *NewAddr = 4973 typename Traits::X86OperandMem *NewAddr =
4769 formMemoryOperand(Addr, Value->getType()); 4974 formMemoryOperand(Addr, Value->getType());
4770 doMockBoundsCheck(NewAddr); 4975 doMockBoundsCheck(NewAddr);
4771 Type Ty = NewAddr->getType(); 4976 Type Ty = NewAddr->getType();
4772 4977
4773 if (!Traits::Is64Bit && Ty == IceType_i64) { 4978 if (!Traits::Is64Bit && Ty == IceType_i64) {
4774 Value = legalizeUndef(Value); 4979 Value = legalizeUndef(Value);
(...skipping 738 matching lines...) Expand 10 before | Expand all | Expand 10 after
5513 case IceType_i8: 5718 case IceType_i8:
5514 case IceType_i16: 5719 case IceType_i16:
5515 case IceType_i32: 5720 case IceType_i32:
5516 case IceType_i64: 5721 case IceType_i64:
5517 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. 5722 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
5518 _mov(Reg, Ctx->getConstantZero(Ty)); 5723 _mov(Reg, Ctx->getConstantZero(Ty));
5519 break; 5724 break;
5520 case IceType_f32: 5725 case IceType_f32:
5521 case IceType_f64: 5726 case IceType_f64:
5522 Context.insert(InstFakeDef::create(Func, Reg)); 5727 Context.insert(InstFakeDef::create(Func, Reg));
5523 // TODO(stichnot): Use xorps/xorpd instead of pxor. 5728 _xorps(Reg, Reg);
5524 _pxor(Reg, Reg);
5525 break; 5729 break;
5526 default: 5730 default:
5527 // All vector types use the same pxor instruction. 5731 // All vector types use the same pxor instruction.
5528 assert(isVectorType(Ty)); 5732 assert(isVectorType(Ty));
5529 Context.insert(InstFakeDef::create(Func, Reg)); 5733 Context.insert(InstFakeDef::create(Func, Reg));
5530 _pxor(Reg, Reg); 5734 _pxor(Reg, Reg);
5531 break; 5735 break;
5532 } 5736 }
5533 return Reg; 5737 return Reg;
5534 } 5738 }
(...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after
6206 } 6410 }
6207 // the offset is not eligible for blinding or pooling, return the original 6411 // the offset is not eligible for blinding or pooling, return the original
6208 // mem operand 6412 // mem operand
6209 return MemOperand; 6413 return MemOperand;
6210 } 6414 }
6211 6415
6212 } // end of namespace X86Internal 6416 } // end of namespace X86Internal
6213 } // end of namespace Ice 6417 } // end of namespace Ice
6214 6418
6215 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6419 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698