Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1497033002: Fuse icmp/fcmp with select (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: unittests work Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 92
93 private: 93 private:
94 BoolFolding(const BoolFolding &) = delete; 94 BoolFolding(const BoolFolding &) = delete;
95 BoolFolding &operator=(const BoolFolding &) = delete; 95 BoolFolding &operator=(const BoolFolding &) = delete;
96 96
97 public: 97 public:
98 BoolFolding() = default; 98 BoolFolding() = default;
99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
101 static bool hasComplexLowering(const Inst *Instr); 101 static bool hasComplexLowering(const Inst *Instr);
102 static bool isValidFolding(BoolFoldingProducerKind ProducerKind,
103 BoolFoldingConsumerKind ConsumerKind);
102 void init(CfgNode *Node); 104 void init(CfgNode *Node);
103 const Inst *getProducerFor(const Operand *Opnd) const; 105 const Inst *getProducerFor(const Operand *Opnd) const;
104 void dump(const Cfg *Func) const; 106 void dump(const Cfg *Func) const;
105 107
106 private: 108 private:
107 /// Returns true if Producers contains a valid entry for the given VarNum. 109 /// Returns true if Producers contains a valid entry for the given VarNum.
108 bool containsValid(SizeT VarNum) const { 110 bool containsValid(SizeT VarNum) const {
109 auto Element = Producers.find(VarNum); 111 auto Element = Producers.find(VarNum);
110 return Element != Producers.end() && Element->second.Instr != nullptr; 112 return Element != Producers.end() && Element->second.Instr != nullptr;
111 } 113 }
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
186 return false; 188 return false;
187 case PK_Icmp64: 189 case PK_Icmp64:
188 return true; 190 return true;
189 case PK_Fcmp: 191 case PK_Fcmp:
190 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
191 .C2 != MachineTraits::Cond::Br_None; 193 .C2 != MachineTraits::Cond::Br_None;
192 } 194 }
193 } 195 }
194 196
195 template <class MachineTraits> 197 template <class MachineTraits>
198 bool BoolFolding<MachineTraits>::isValidFolding(
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {
201 switch (ProducerKind) {
202 default:
203 return false;
204 case PK_Icmp32:
205 case PK_Icmp64:
206 case PK_Fcmp:
207 return (ConsumerKind == CK_Br) || (ConsumerKind == CK_Select);
208 case PK_Arith:
209 return ConsumerKind == CK_Br;
210 }
211 }
212
213 template <class MachineTraits>
196 void BoolFolding<MachineTraits>::init(CfgNode *Node) { 214 void BoolFolding<MachineTraits>::init(CfgNode *Node) {
197 Producers.clear(); 215 Producers.clear();
198 for (Inst &Instr : Node->getInsts()) { 216 for (Inst &Instr : Node->getInsts()) {
199 // Check whether Instr is a valid producer. 217 // Check whether Instr is a valid producer.
200 Variable *Var = Instr.getDest(); 218 Variable *Var = Instr.getDest();
201 if (!Instr.isDeleted() // only consider non-deleted instructions 219 if (!Instr.isDeleted() // only consider non-deleted instructions
202 && Var // only instructions with an actual dest var 220 && Var // only instructions with an actual dest var
203 && Var->getType() == IceType_i1 // only bool-type dest vars 221 && Var->getType() == IceType_i1 // only bool-type dest vars
204 && getProducerKind(&Instr) != PK_None) { // white-listed instructions 222 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
205 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr); 223 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);
206 } 224 }
207 // Check each src variable against the map. 225 // Check each src variable against the map.
208 FOREACH_VAR_IN_INST(Var, Instr) { 226 FOREACH_VAR_IN_INST(Var, Instr) {
209 SizeT VarNum = Var->getIndex(); 227 SizeT VarNum = Var->getIndex();
210 if (containsValid(VarNum)) { 228 if (!containsValid(VarNum))
211 if (IndexOfVarOperandInInst(Var) != 229 continue;
212 0 // All valid consumers use Var as the first source operand 230 // All valid consumers use Var as the first source operand
213 || 231 if (IndexOfVarOperandInInst(Var) != 0) {
214 getConsumerKind(&Instr) == CK_None // must be white-listed 232 setInvalid(VarNum);
215 || 233 continue;
216 (getConsumerKind(&Instr) != CK_Br && // Icmp64 only folds in branch 234 }
217 getProducerKind(Producers[VarNum].Instr) != PK_Icmp32) || 235 // Consumer instructions must be white-listed
218 (Producers[VarNum].IsComplex && // complex can't be multi-use 236 auto ConsumerKind = getConsumerKind(&Instr);
John 2015/12/07 13:07:44 I personally like using auto **everywhere** but un
Jim Stichnoth 2015/12/08 18:55:59 I don't think auto should be used here.
sehr 2015/12/15 20:45:44 Jim shot you down :-).
sehr 2015/12/15 20:45:44 Done.
219 Producers[VarNum].NumUses > 0)) { 237 if (ConsumerKind == CK_None) {
220 setInvalid(VarNum); 238 setInvalid(VarNum);
221 continue; 239 continue;
222 } 240 }
223 ++Producers[VarNum].NumUses; 241 auto ProducerKind = getProducerKind(Producers[VarNum].Instr);
224 if (Instr.isLastUse(Var)) { 242 if (!isValidFolding(ProducerKind, ConsumerKind)) {
225 Producers[VarNum].IsLiveOut = false; 243 setInvalid(VarNum);
226 } 244 continue;
245 }
246 // Avoid creating multiple copies of complex producer instructions.
247 if (Producers[VarNum].IsComplex && Producers[VarNum].NumUses > 0) {
248 setInvalid(VarNum);
249 continue;
250 }
251 ++Producers[VarNum].NumUses;
252 if (Instr.isLastUse(Var)) {
253 Producers[VarNum].IsLiveOut = false;
227 } 254 }
228 } 255 }
229 } 256 }
230 for (auto &I : Producers) { 257 for (auto &I : Producers) {
231 // Ignore entries previously marked invalid. 258 // Ignore entries previously marked invalid.
232 if (I.second.Instr == nullptr) 259 if (I.second.Instr == nullptr)
233 continue; 260 continue;
234 // Disable the producer if its dest may be live beyond this block. 261 // Disable the producer if its dest may be live beyond this block.
235 if (I.second.IsLiveOut) { 262 if (I.second.IsLiveOut) {
236 setInvalid(I.first); 263 setInvalid(I.first);
(...skipping 1641 matching lines...) Expand 10 before | Expand all | Expand 10 after
1878 } 1905 }
1879 } 1906 }
1880 1907
1881 template <class Machine> 1908 template <class Machine>
1882 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1909 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1883 Variable *Dest = Inst->getDest(); 1910 Variable *Dest = Inst->getDest();
1884 if (Dest->isRematerializable()) { 1911 if (Dest->isRematerializable()) {
1885 Context.insert(InstFakeDef::create(Func, Dest)); 1912 Context.insert(InstFakeDef::create(Func, Dest));
1886 return; 1913 return;
1887 } 1914 }
1888 Operand *Src0 = Inst->getSrc(0); 1915 Operand *Src = Inst->getSrc(0);
1889 assert(Dest->getType() == Src0->getType()); 1916 assert(Dest->getType() == Src->getType());
1890 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1917 lowerMove(Dest, Src, false);
1891 Src0 = legalize(Src0);
1892 Operand *Src0Lo = loOperand(Src0);
1893 Operand *Src0Hi = hiOperand(Src0);
1894 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1895 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1896 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1897 _mov(T_Lo, Src0Lo);
1898 _mov(DestLo, T_Lo);
1899 _mov(T_Hi, Src0Hi);
1900 _mov(DestHi, T_Hi);
1901 } else {
1902 Operand *Src0Legal;
1903 if (Dest->hasReg()) {
1904 // If Dest already has a physical register, then only basic legalization
1905 // is needed, as the source operand can be a register, immediate, or
1906 // memory.
1907 Src0Legal = legalize(Src0, Legal_Reg, Dest->getRegNum());
1908 } else {
1909 // If Dest could be a stack operand, then RI must be a physical register
1910 // or a scalar integer immediate.
1911 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
1912 }
1913 if (isVectorType(Dest->getType()))
1914 _movp(Dest, Src0Legal);
1915 else
1916 _mov(Dest, Src0Legal);
1917 }
1918 } 1918 }
1919 1919
1920 template <class Machine> 1920 template <class Machine>
1921 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { 1921 void TargetX86Base<Machine>::lowerBr(const InstBr *Br) {
1922 if (Inst->isUnconditional()) { 1922 if (Br->isUnconditional()) {
1923 _br(Inst->getTargetUnconditional()); 1923 _br(Br->getTargetUnconditional());
1924 return; 1924 return;
1925 } 1925 }
1926 Operand *Cond = Inst->getCondition(); 1926 Operand *Cond = Br->getCondition();
1927 1927
1928 // Handle folding opportunities. 1928 // Handle folding opportunities.
1929 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 1929 if (const Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1930 assert(Producer->isDeleted()); 1930 assert(Producer->isDeleted());
1931 switch (BoolFolding::getProducerKind(Producer)) { 1931 switch (BoolFolding::getProducerKind(Producer)) {
1932 default: 1932 default:
1933 break; 1933 break;
1934 case BoolFolding::PK_Icmp32: 1934 case BoolFolding::PK_Icmp32:
1935 case BoolFolding::PK_Icmp64: { 1935 case BoolFolding::PK_Icmp64: {
1936 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); 1936 lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Br);
1937 return; 1937 return;
1938 } 1938 }
1939 case BoolFolding::PK_Fcmp: { 1939 case BoolFolding::PK_Fcmp: {
1940 lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst); 1940 lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Br);
1941 return; 1941 return;
1942 } 1942 }
1943 case BoolFolding::PK_Arith: { 1943 case BoolFolding::PK_Arith: {
1944 lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst); 1944 lowerArithAndConsumer(llvm::dyn_cast<InstArithmetic>(Producer), Br);
1945 return; 1945 return;
1946 } 1946 }
1947 } 1947 }
1948 } 1948 }
1949 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 1949 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1950 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1950 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1951 _cmp(Src0, Zero); 1951 _cmp(Src0, Zero);
1952 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 1952 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
1953 } 1953 }
1954 1954
1955 template <class Machine> 1955 template <class Machine>
1956 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { 1956 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1957 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 1957 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1958 InstCast::OpKind CastKind = Inst->getCastKind(); 1958 InstCast::OpKind CastKind = Inst->getCastKind();
1959 Variable *Dest = Inst->getDest(); 1959 Variable *Dest = Inst->getDest();
1960 Type DestTy = Dest->getType(); 1960 Type DestTy = Dest->getType();
1961 switch (CastKind) { 1961 switch (CastKind) {
1962 default: 1962 default:
(...skipping 513 matching lines...) Expand 10 before | Expand all | Expand 10 after
2476 lowerCast(Cast); 2476 lowerCast(Cast);
2477 ExtractedElementR = T; 2477 ExtractedElementR = T;
2478 } 2478 }
2479 2479
2480 // Copy the element to the destination. 2480 // Copy the element to the destination.
2481 Variable *Dest = Inst->getDest(); 2481 Variable *Dest = Inst->getDest();
2482 _mov(Dest, ExtractedElementR); 2482 _mov(Dest, ExtractedElementR);
2483 } 2483 }
2484 2484
2485 template <class Machine> 2485 template <class Machine>
2486 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { 2486 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Fcmp) {
2487 constexpr InstBr *Br = nullptr; 2487 Variable *Dest = Fcmp->getDest();
2488 lowerFcmpAndBr(Inst, Br); 2488
2489 if (isVectorType(Dest->getType())) {
2490 lowerFcmpVector(Fcmp);
2491 } else {
2492 constexpr Inst *Consumer = nullptr;
2493 lowerFcmpAndConsumer(Fcmp, Consumer);
2494 }
2489 } 2495 }
2490 2496
2491 template <class Machine> 2497 template <class Machine>
2492 void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, 2498 void TargetX86Base<Machine>::lowerFcmpAndConsumer(const InstFcmp *Fcmp,
2493 const InstBr *Br) { 2499 const Inst *Consumer) {
2494 Operand *Src0 = Inst->getSrc(0); 2500 Operand *Src0 = Fcmp->getSrc(0);
2495 Operand *Src1 = Inst->getSrc(1); 2501 Operand *Src1 = Fcmp->getSrc(1);
2496 Variable *Dest = Inst->getDest(); 2502 Variable *Dest = Fcmp->getDest();
2497 2503
2498 if (isVectorType(Dest->getType())) { 2504 if (isVectorType(Dest->getType()))
2499 if (Br) 2505 llvm::report_fatal_error("Vector compare/branch cannot be folded");
2500 llvm::report_fatal_error("vector compare/branch cannot be folded");
2501 InstFcmp::FCond Condition = Inst->getCondition();
2502 size_t Index = static_cast<size_t>(Condition);
2503 assert(Index < Traits::TableFcmpSize);
2504 2506
2505 if (Traits::TableFcmp[Index].SwapVectorOperands) 2507 if (Consumer != nullptr) {
John 2015/12/07 13:07:44 Consider moving this block down to where InstSelec
sehr 2015/12/15 20:45:44 It's up here as a short-circuit before any lowerin
2506 std::swap(Src0, Src1); 2508 if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2507 2509 if (lowerOptimizeFcmpSelect(Fcmp, Select))
2508 Variable *T = nullptr; 2510 return;
2509
2510 if (Condition == InstFcmp::True) {
2511 // makeVectorOfOnes() requires an integer vector type.
2512 T = makeVectorOfMinusOnes(IceType_v4i32);
2513 } else if (Condition == InstFcmp::False) {
2514 T = makeVectorOfZeros(Dest->getType());
2515 } else {
2516 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2517 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2518 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2519 Src1RM = legalizeToReg(Src1RM);
2520
2521 switch (Condition) {
2522 default: {
2523 typename Traits::Cond::CmppsCond Predicate =
2524 Traits::TableFcmp[Index].Predicate;
2525 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2526 T = makeReg(Src0RM->getType());
2527 _movp(T, Src0RM);
2528 _cmpps(T, Src1RM, Predicate);
2529 } break;
2530 case InstFcmp::One: {
2531 // Check both unequal and ordered.
2532 T = makeReg(Src0RM->getType());
2533 Variable *T2 = makeReg(Src0RM->getType());
2534 _movp(T, Src0RM);
2535 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
2536 _movp(T2, Src0RM);
2537 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
2538 _pand(T, T2);
2539 } break;
2540 case InstFcmp::Ueq: {
2541 // Check both equal or unordered.
2542 T = makeReg(Src0RM->getType());
2543 Variable *T2 = makeReg(Src0RM->getType());
2544 _movp(T, Src0RM);
2545 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
2546 _movp(T2, Src0RM);
2547 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
2548 _por(T, T2);
2549 } break;
2550 }
2551 } 2511 }
2552
2553 _movp(Dest, T);
2554 eliminateNextVectorSextInstruction(Dest);
2555 return;
2556 } 2512 }
2557 2513
2558 // Lowering a = fcmp cond, b, c 2514 // Lowering a = fcmp cond, b, c
2559 // ucomiss b, c /* only if C1 != Br_None */ 2515 // ucomiss b, c /* only if C1 != Br_None */
2560 // /* but swap b,c order if SwapOperands==true */ 2516 // /* but swap b,c order if SwapOperands==true */
2561 // mov a, <default> 2517 // mov a, <default>
2562 // j<C1> label /* only if C1 != Br_None */ 2518 // j<C1> label /* only if C1 != Br_None */
2563 // j<C2> label /* only if C2 != Br_None */ 2519 // j<C2> label /* only if C2 != Br_None */
2564 // FakeUse(a) /* only if C1 != Br_None */ 2520 // FakeUse(a) /* only if C1 != Br_None */
2565 // mov a, !<default> /* only if C1 != Br_None */ 2521 // mov a, !<default> /* only if C1 != Br_None */
2566 // label: /* only if C1 != Br_None */ 2522 // label: /* only if C1 != Br_None */
2567 // 2523 //
2568 // setcc lowering when C1 != Br_None && C2 == Br_None: 2524 // setcc lowering when C1 != Br_None && C2 == Br_None:
2569 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ 2525 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2570 // setcc a, C1 2526 // setcc a, C1
2571 InstFcmp::FCond Condition = Inst->getCondition(); 2527 InstFcmp::FCond Condition = Fcmp->getCondition();
2572 size_t Index = static_cast<size_t>(Condition); 2528 size_t Index = static_cast<size_t>(Condition);
2573 assert(Index < Traits::TableFcmpSize); 2529 assert(Index < Traits::TableFcmpSize);
2574 if (Traits::TableFcmp[Index].SwapScalarOperands) 2530 if (Traits::TableFcmp[Index].SwapScalarOperands)
2575 std::swap(Src0, Src1); 2531 std::swap(Src0, Src1);
2576 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); 2532 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2577 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); 2533 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
2578 if (HasC1) { 2534 if (HasC1) {
2579 Src0 = legalize(Src0); 2535 Src0 = legalize(Src0);
2580 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2536 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2581 Variable *T = nullptr; 2537 Variable *T = nullptr;
2582 _mov(T, Src0); 2538 _mov(T, Src0);
2583 _ucomiss(T, Src1RM); 2539 _ucomiss(T, Src1RM);
2584 if (!HasC2) { 2540 if (!HasC2) {
2585 assert(Traits::TableFcmp[Index].Default); 2541 assert(Traits::TableFcmp[Index].Default);
2586 setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br); 2542 setccOrConsumer(Traits::TableFcmp[Index].C1, Dest, Consumer);
2587 return; 2543 return;
2588 } 2544 }
2589 } 2545 }
2590 int32_t IntDefault = Traits::TableFcmp[Index].Default; 2546 int32_t IntDefault = Traits::TableFcmp[Index].Default;
2591 if (Br == nullptr) { 2547 if (Consumer == nullptr) {
2592 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); 2548 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
2593 _mov(Dest, Default); 2549 _mov(Dest, Default);
2594 if (HasC1) { 2550 if (HasC1) {
2595 typename Traits::Insts::Label *Label = 2551 typename Traits::Insts::Label *Label =
2596 Traits::Insts::Label::create(Func, this); 2552 Traits::Insts::Label::create(Func, this);
2597 _br(Traits::TableFcmp[Index].C1, Label); 2553 _br(Traits::TableFcmp[Index].C1, Label);
2598 if (HasC2) { 2554 if (HasC2) {
2599 _br(Traits::TableFcmp[Index].C2, Label); 2555 _br(Traits::TableFcmp[Index].C2, Label);
2600 } 2556 }
2601 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); 2557 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
2602 _mov_redefined(Dest, NonDefault); 2558 _mov_redefined(Dest, NonDefault);
2603 Context.insert(Label); 2559 Context.insert(Label);
2604 } 2560 }
Jim Stichnoth 2015/12/08 18:55:59 May be an opportunity to put an early return in ea
sehr 2015/12/15 20:45:44 Done, here and everywhere.
2605 } else { 2561 } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2606 CfgNode *TrueSucc = Br->getTargetTrue(); 2562 CfgNode *TrueSucc = Br->getTargetTrue();
2607 CfgNode *FalseSucc = Br->getTargetFalse(); 2563 CfgNode *FalseSucc = Br->getTargetFalse();
2608 if (IntDefault != 0) 2564 if (IntDefault != 0)
2609 std::swap(TrueSucc, FalseSucc); 2565 std::swap(TrueSucc, FalseSucc);
2610 if (HasC1) { 2566 if (HasC1) {
2611 _br(Traits::TableFcmp[Index].C1, FalseSucc); 2567 _br(Traits::TableFcmp[Index].C1, FalseSucc);
2612 if (HasC2) { 2568 if (HasC2) {
2613 _br(Traits::TableFcmp[Index].C2, FalseSucc); 2569 _br(Traits::TableFcmp[Index].C2, FalseSucc);
2614 } 2570 }
2615 _br(TrueSucc); 2571 _br(TrueSucc);
2616 return; 2572 return;
2617 } 2573 }
2618 _br(FalseSucc); 2574 _br(FalseSucc);
2575 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2576 Operand *SrcT = Select->getTrueOperand();
2577 Operand *SrcF = Select->getFalseOperand();
2578 Variable *SelectDest = Select->getDest();
2579 if (IntDefault != 0)
2580 std::swap(SrcT, SrcF);
2581 lowerMove(SelectDest, SrcF, false);
2582 if (HasC1) {
2583 typename Traits::Insts::Label *Label =
2584 Traits::Insts::Label::create(Func, this);
2585 _br(Traits::TableFcmp[Index].C1, Label);
2586 if (HasC2) {
2587 _br(Traits::TableFcmp[Index].C2, Label);
2588 }
2589 static constexpr bool IsRedefinition = true;
2590 lowerMove(SelectDest, SrcT, IsRedefinition);
2591 Context.insert(Label);
2592 }
2593 } else {
2594 llvm::report_fatal_error("Unexpected consumer type");
2619 } 2595 }
2620 } 2596 }
2621 2597
2598 template <class Machine>
2599 void TargetX86Base<Machine>::lowerFcmpVector(const InstFcmp *Fcmp) {
2600 Operand *Src0 = Fcmp->getSrc(0);
2601 Operand *Src1 = Fcmp->getSrc(1);
2602 Variable *Dest = Fcmp->getDest();
2603
2604 if (!isVectorType(Dest->getType()))
2605 llvm::report_fatal_error("Expected vector compare");
2606
2607 InstFcmp::FCond Condition = Fcmp->getCondition();
2608 size_t Index = static_cast<size_t>(Condition);
2609 assert(Index < Traits::TableFcmpSize);
2610
2611 if (Traits::TableFcmp[Index].SwapVectorOperands)
2612 std::swap(Src0, Src1);
2613
2614 Variable *T = nullptr;
2615
2616 if (Condition == InstFcmp::True) {
2617 // makeVectorOfOnes() requires an integer vector type.
2618 T = makeVectorOfMinusOnes(IceType_v4i32);
2619 } else if (Condition == InstFcmp::False) {
2620 T = makeVectorOfZeros(Dest->getType());
2621 } else {
2622 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2623 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2624 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2625 Src1RM = legalizeToReg(Src1RM);
2626
2627 switch (Condition) {
2628 default: {
2629 typename Traits::Cond::CmppsCond Predicate =
2630 Traits::TableFcmp[Index].Predicate;
2631 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2632 T = makeReg(Src0RM->getType());
2633 _movp(T, Src0RM);
2634 _cmpps(T, Src1RM, Predicate);
2635 } break;
2636 case InstFcmp::One: {
2637 // Check both unequal and ordered.
2638 T = makeReg(Src0RM->getType());
2639 Variable *T2 = makeReg(Src0RM->getType());
2640 _movp(T, Src0RM);
2641 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
2642 _movp(T2, Src0RM);
2643 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
2644 _pand(T, T2);
2645 } break;
2646 case InstFcmp::Ueq: {
2647 // Check both equal or unordered.
2648 T = makeReg(Src0RM->getType());
2649 Variable *T2 = makeReg(Src0RM->getType());
2650 _movp(T, Src0RM);
2651 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
2652 _movp(T2, Src0RM);
2653 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
2654 _por(T, T2);
2655 } break;
2656 }
2657 }
2658
John 2015/12/07 13:07:44 assert(T != nullptr), maybe?
sehr 2015/12/15 20:45:44 Agreed. Done.
2659 _movp(Dest, T);
2660 eliminateNextVectorSextInstruction(Dest);
2661 }
2662
2622 inline bool isZero(const Operand *Opnd) { 2663 inline bool isZero(const Operand *Opnd) {
2623 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) 2664 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
2624 return C64->getValue() == 0; 2665 return C64->getValue() == 0;
2625 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) 2666 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
2626 return C32->getValue() == 0; 2667 return C32->getValue() == 0;
2627 return false; 2668 return false;
2628 } 2669 }
2629 2670
2630 template <class Machine> 2671 template <class Machine>
2631 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) { 2672 void TargetX86Base<Machine>::lowerIcmpAndConsumer(const InstIcmp *Icmp,
2632 constexpr InstBr *Br = nullptr; 2673 const Inst *Consumer) {
2633 lowerIcmpAndBr(Inst, Br);
2634 }
2635
2636 template <class Machine>
2637 void TargetX86Base<Machine>::lowerIcmpAndBr(const InstIcmp *Icmp,
2638 const InstBr *Br) {
2639 Operand *Src0 = legalize(Icmp->getSrc(0)); 2674 Operand *Src0 = legalize(Icmp->getSrc(0));
2640 Operand *Src1 = legalize(Icmp->getSrc(1)); 2675 Operand *Src1 = legalize(Icmp->getSrc(1));
2641 Variable *Dest = Icmp->getDest(); 2676 Variable *Dest = Icmp->getDest();
2642 2677
2643 if (isVectorType(Dest->getType())) { 2678 if (isVectorType(Dest->getType()))
2644 if (Br) 2679 llvm::report_fatal_error("Vector compare/branch cannot be folded");
2645 llvm::report_fatal_error("vector compare/branch cannot be folded");
2646 Type Ty = Src0->getType();
2647 // Promote i1 vectors to 128 bit integer vector types.
2648 if (typeElementType(Ty) == IceType_i1) {
2649 Type NewTy = IceType_NUM;
2650 switch (Ty) {
2651 default:
2652 llvm_unreachable("unexpected type");
2653 break;
2654 case IceType_v4i1:
2655 NewTy = IceType_v4i32;
2656 break;
2657 case IceType_v8i1:
2658 NewTy = IceType_v8i16;
2659 break;
2660 case IceType_v16i1:
2661 NewTy = IceType_v16i8;
2662 break;
2663 }
2664 Variable *NewSrc0 = Func->makeVariable(NewTy);
2665 Variable *NewSrc1 = Func->makeVariable(NewTy);
2666 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2667 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2668 Src0 = NewSrc0;
2669 Src1 = NewSrc1;
2670 Ty = NewTy;
2671 }
2672 2680
2673 InstIcmp::ICond Condition = Icmp->getCondition(); 2681 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2674 2682 lowerIcmp64(Icmp, Consumer);
2675 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2676 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2677
2678 // SSE2 only has signed comparison operations. Transform unsigned inputs in
2679 // a manner that allows for the use of signed comparison operations by
2680 // flipping the high order bits.
2681 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2682 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2683 Variable *T0 = makeReg(Ty);
2684 Variable *T1 = makeReg(Ty);
2685 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2686 _movp(T0, Src0RM);
2687 _pxor(T0, HighOrderBits);
2688 _movp(T1, Src1RM);
2689 _pxor(T1, HighOrderBits);
2690 Src0RM = T0;
2691 Src1RM = T1;
2692 }
2693
2694 Variable *T = makeReg(Ty);
2695 switch (Condition) {
2696 default:
2697 llvm_unreachable("unexpected condition");
2698 break;
2699 case InstIcmp::Eq: {
2700 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2701 Src1RM = legalizeToReg(Src1RM);
2702 _movp(T, Src0RM);
2703 _pcmpeq(T, Src1RM);
2704 } break;
2705 case InstIcmp::Ne: {
2706 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2707 Src1RM = legalizeToReg(Src1RM);
2708 _movp(T, Src0RM);
2709 _pcmpeq(T, Src1RM);
2710 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2711 _pxor(T, MinusOne);
2712 } break;
2713 case InstIcmp::Ugt:
2714 case InstIcmp::Sgt: {
2715 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2716 Src1RM = legalizeToReg(Src1RM);
2717 _movp(T, Src0RM);
2718 _pcmpgt(T, Src1RM);
2719 } break;
2720 case InstIcmp::Uge:
2721 case InstIcmp::Sge: {
2722 // !(Src1RM > Src0RM)
2723 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2724 Src0RM = legalizeToReg(Src0RM);
2725 _movp(T, Src1RM);
2726 _pcmpgt(T, Src0RM);
2727 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2728 _pxor(T, MinusOne);
2729 } break;
2730 case InstIcmp::Ult:
2731 case InstIcmp::Slt: {
2732 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2733 Src0RM = legalizeToReg(Src0RM);
2734 _movp(T, Src1RM);
2735 _pcmpgt(T, Src0RM);
2736 } break;
2737 case InstIcmp::Ule:
2738 case InstIcmp::Sle: {
2739 // !(Src0RM > Src1RM)
2740 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2741 Src1RM = legalizeToReg(Src1RM);
2742 _movp(T, Src0RM);
2743 _pcmpgt(T, Src1RM);
2744 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2745 _pxor(T, MinusOne);
2746 } break;
2747 }
2748
2749 _movp(Dest, T);
2750 eliminateNextVectorSextInstruction(Dest);
2751 return; 2683 return;
2752 } 2684 }
2753 2685
2754 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2755 lowerIcmp64(Icmp, Br);
2756 return;
2757 }
2758
2759 // cmp b, c 2686 // cmp b, c
2760 if (isZero(Src1)) { 2687 if (isZero(Src1)) {
2761 switch (Icmp->getCondition()) { 2688 switch (Icmp->getCondition()) {
2762 default: 2689 default:
2763 break; 2690 break;
2764 case InstIcmp::Uge: 2691 case InstIcmp::Uge:
2765 movOrBr(true, Dest, Br); 2692 movOrConsumer(true, Dest, Consumer);
2766 return; 2693 return;
2767 case InstIcmp::Ult: 2694 case InstIcmp::Ult:
2768 movOrBr(false, Dest, Br); 2695 movOrConsumer(false, Dest, Consumer);
2769 return; 2696 return;
2770 } 2697 }
2771 } 2698 }
2772 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2699 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2773 _cmp(Src0RM, Src1); 2700 _cmp(Src0RM, Src1);
2774 setccOrBr(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest, Br); 2701 setccOrConsumer(Traits::getIcmp32Mapping(Icmp->getCondition()), Dest,
2702 Consumer);
2703 }
2704
2705 template <class Machine>
2706 void TargetX86Base<Machine>::lowerIcmpVector(const InstIcmp *Icmp) {
2707 Operand *Src0 = legalize(Icmp->getSrc(0));
2708 Operand *Src1 = legalize(Icmp->getSrc(1));
2709 Variable *Dest = Icmp->getDest();
2710
2711 if (!isVectorType(Dest->getType()))
2712 llvm::report_fatal_error("Expected a vector compare");
2713
2714 Type Ty = Src0->getType();
2715 // Promote i1 vectors to 128 bit integer vector types.
2716 if (typeElementType(Ty) == IceType_i1) {
John 2015/12/07 13:07:44 optional: maybe create a helper function for conve
sehr 2015/12/15 20:45:44 I think there's only one use (for now), but intend
2717 Type NewTy = IceType_NUM;
2718 switch (Ty) {
2719 default:
2720 llvm_unreachable("unexpected type");
Jim Stichnoth 2015/12/08 18:55:59 I think report_fatal_error would be better here.
sehr 2015/12/15 20:45:44 Done.
2721 break;
2722 case IceType_v4i1:
2723 NewTy = IceType_v4i32;
2724 break;
2725 case IceType_v8i1:
2726 NewTy = IceType_v8i16;
2727 break;
2728 case IceType_v16i1:
2729 NewTy = IceType_v16i8;
2730 break;
2731 }
2732 Variable *NewSrc0 = Func->makeVariable(NewTy);
2733 Variable *NewSrc1 = Func->makeVariable(NewTy);
2734 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
2735 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
2736 Src0 = NewSrc0;
2737 Src1 = NewSrc1;
2738 Ty = NewTy;
2739 }
2740
2741 InstIcmp::ICond Condition = Icmp->getCondition();
2742
2743 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2744 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2745
2746 // SSE2 only has signed comparison operations. Transform unsigned inputs in
2747 // a manner that allows for the use of signed comparison operations by
2748 // flipping the high order bits.
2749 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2750 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2751 Variable *T0 = makeReg(Ty);
2752 Variable *T1 = makeReg(Ty);
2753 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2754 _movp(T0, Src0RM);
2755 _pxor(T0, HighOrderBits);
2756 _movp(T1, Src1RM);
2757 _pxor(T1, HighOrderBits);
2758 Src0RM = T0;
2759 Src1RM = T1;
2760 }
2761
2762 Variable *T = makeReg(Ty);
2763 switch (Condition) {
2764 default:
2765 llvm_unreachable("unexpected condition");
2766 break;
2767 case InstIcmp::Eq: {
2768 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2769 Src1RM = legalizeToReg(Src1RM);
2770 _movp(T, Src0RM);
2771 _pcmpeq(T, Src1RM);
2772 } break;
2773 case InstIcmp::Ne: {
2774 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2775 Src1RM = legalizeToReg(Src1RM);
2776 _movp(T, Src0RM);
2777 _pcmpeq(T, Src1RM);
2778 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2779 _pxor(T, MinusOne);
2780 } break;
2781 case InstIcmp::Ugt:
2782 case InstIcmp::Sgt: {
2783 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2784 Src1RM = legalizeToReg(Src1RM);
2785 _movp(T, Src0RM);
2786 _pcmpgt(T, Src1RM);
2787 } break;
2788 case InstIcmp::Uge:
2789 case InstIcmp::Sge: {
2790 // !(Src1RM > Src0RM)
2791 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2792 Src0RM = legalizeToReg(Src0RM);
2793 _movp(T, Src1RM);
2794 _pcmpgt(T, Src0RM);
2795 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2796 _pxor(T, MinusOne);
2797 } break;
2798 case InstIcmp::Ult:
2799 case InstIcmp::Slt: {
2800 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2801 Src0RM = legalizeToReg(Src0RM);
2802 _movp(T, Src1RM);
2803 _pcmpgt(T, Src0RM);
2804 } break;
2805 case InstIcmp::Ule:
2806 case InstIcmp::Sle: {
2807 // !(Src0RM > Src1RM)
2808 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2809 Src1RM = legalizeToReg(Src1RM);
2810 _movp(T, Src0RM);
2811 _pcmpgt(T, Src1RM);
2812 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2813 _pxor(T, MinusOne);
2814 } break;
2815 }
2816
2817 _movp(Dest, T);
2818 eliminateNextVectorSextInstruction(Dest);
2775 } 2819 }
2776 2820
2777 template <typename Machine> 2821 template <typename Machine>
2778 template <typename T> 2822 template <typename T>
2779 typename std::enable_if<!T::Is64Bit, void>::type 2823 typename std::enable_if<!T::Is64Bit, void>::type
2780 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, const InstBr *Br) { 2824 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,
2825 const Inst *Consumer) {
2781 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2826 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2782 Operand *Src0 = legalize(Icmp->getSrc(0)); 2827 Operand *Src0 = legalize(Icmp->getSrc(0));
2783 Operand *Src1 = legalize(Icmp->getSrc(1)); 2828 Operand *Src1 = legalize(Icmp->getSrc(1));
2784 Variable *Dest = Icmp->getDest(); 2829 Variable *Dest = Icmp->getDest();
2785 InstIcmp::ICond Condition = Icmp->getCondition(); 2830 InstIcmp::ICond Condition = Icmp->getCondition();
2786 size_t Index = static_cast<size_t>(Condition); 2831 size_t Index = static_cast<size_t>(Condition);
2787 assert(Index < Traits::TableIcmp64Size); 2832 assert(Index < Traits::TableIcmp64Size);
2788 Operand *Src0LoRM = nullptr; 2833 Operand *Src0LoRM = nullptr;
2789 Operand *Src0HiRM = nullptr; 2834 Operand *Src0HiRM = nullptr;
2790 // Legalize the portions of Src0 that are going to be needed. 2835 // Legalize the portions of Src0 that are going to be needed.
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2828 default: 2873 default:
2829 llvm_unreachable("unexpected condition"); 2874 llvm_unreachable("unexpected condition");
2830 break; 2875 break;
2831 case InstIcmp::Eq: 2876 case InstIcmp::Eq:
2832 case InstIcmp::Ule: 2877 case InstIcmp::Ule:
2833 // Mov Src0HiRM first, because it was legalized most recently, and will 2878 // Mov Src0HiRM first, because it was legalized most recently, and will
2834 // sometimes avoid a move before the OR. 2879 // sometimes avoid a move before the OR.
2835 _mov(Temp, Src0HiRM); 2880 _mov(Temp, Src0HiRM);
2836 _or(Temp, Src0LoRM); 2881 _or(Temp, Src0LoRM);
2837 Context.insert(InstFakeUse::create(Func, Temp)); 2882 Context.insert(InstFakeUse::create(Func, Temp));
2838 setccOrBr(Traits::Cond::Br_e, Dest, Br); 2883 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
2839 return; 2884 return;
2840 case InstIcmp::Ne: 2885 case InstIcmp::Ne:
2841 case InstIcmp::Ugt: 2886 case InstIcmp::Ugt:
2842 // Mov Src0HiRM first, because it was legalized most recently, and will 2887 // Mov Src0HiRM first, because it was legalized most recently, and will
2843 // sometimes avoid a move before the OR. 2888 // sometimes avoid a move before the OR.
2844 _mov(Temp, Src0HiRM); 2889 _mov(Temp, Src0HiRM);
2845 _or(Temp, Src0LoRM); 2890 _or(Temp, Src0LoRM);
2846 Context.insert(InstFakeUse::create(Func, Temp)); 2891 Context.insert(InstFakeUse::create(Func, Temp));
2847 setccOrBr(Traits::Cond::Br_ne, Dest, Br); 2892 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
2848 return; 2893 return;
2849 case InstIcmp::Uge: 2894 case InstIcmp::Uge:
2850 movOrBr(true, Dest, Br); 2895 movOrConsumer(true, Dest, Consumer);
2851 return; 2896 return;
2852 case InstIcmp::Ult: 2897 case InstIcmp::Ult:
2853 movOrBr(false, Dest, Br); 2898 movOrConsumer(false, Dest, Consumer);
2854 return; 2899 return;
2855 case InstIcmp::Sgt: 2900 case InstIcmp::Sgt:
2856 break; 2901 break;
2857 case InstIcmp::Sge: 2902 case InstIcmp::Sge:
2858 _test(Src0HiRM, SignMask); 2903 _test(Src0HiRM, SignMask);
2859 setccOrBr(Traits::Cond::Br_e, Dest, Br); 2904 setccOrConsumer(Traits::Cond::Br_e, Dest, Consumer);
2860 return; 2905 return;
2861 case InstIcmp::Slt: 2906 case InstIcmp::Slt:
2862 _test(Src0HiRM, SignMask); 2907 _test(Src0HiRM, SignMask);
2863 setccOrBr(Traits::Cond::Br_ne, Dest, Br); 2908 setccOrConsumer(Traits::Cond::Br_ne, Dest, Consumer);
2864 return; 2909 return;
2865 case InstIcmp::Sle: 2910 case InstIcmp::Sle:
2866 break; 2911 break;
2867 } 2912 }
2868 } 2913 }
2869 // Handle general compares. 2914 // Handle general compares.
2870 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2915 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2871 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2916 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2872 if (Br == nullptr) { 2917 if (Consumer == nullptr) {
2873 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0); 2918 Constant *Zero = Ctx->getConstantInt(Dest->getType(), 0);
2874 Constant *One = Ctx->getConstantInt(Dest->getType(), 1); 2919 Constant *One = Ctx->getConstantInt(Dest->getType(), 1);
2875 typename Traits::Insts::Label *LabelFalse = 2920 typename Traits::Insts::Label *LabelFalse =
2876 Traits::Insts::Label::create(Func, this); 2921 Traits::Insts::Label::create(Func, this);
2877 typename Traits::Insts::Label *LabelTrue = 2922 typename Traits::Insts::Label *LabelTrue =
2878 Traits::Insts::Label::create(Func, this); 2923 Traits::Insts::Label::create(Func, this);
2879 _mov(Dest, One); 2924 _mov(Dest, One);
2880 _cmp(Src0HiRM, Src1HiRI); 2925 _cmp(Src0HiRM, Src1HiRI);
2881 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2926 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2882 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 2927 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2883 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2928 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2884 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 2929 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2885 _cmp(Src0LoRM, Src1LoRI); 2930 _cmp(Src0LoRM, Src1LoRI);
2886 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 2931 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2887 Context.insert(LabelFalse); 2932 Context.insert(LabelFalse);
2888 _mov_redefined(Dest, Zero); 2933 _mov_redefined(Dest, Zero);
2889 Context.insert(LabelTrue); 2934 Context.insert(LabelTrue);
2890 } else { 2935 } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2891 _cmp(Src0HiRM, Src1HiRI); 2936 _cmp(Src0HiRM, Src1HiRI);
2892 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 2937 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2893 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue()); 2938 _br(Traits::TableIcmp64[Index].C1, Br->getTargetTrue());
2894 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 2939 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2895 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse()); 2940 _br(Traits::TableIcmp64[Index].C2, Br->getTargetFalse());
2896 _cmp(Src0LoRM, Src1LoRI); 2941 _cmp(Src0LoRM, Src1LoRI);
2897 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(), 2942 _br(Traits::TableIcmp64[Index].C3, Br->getTargetTrue(),
2898 Br->getTargetFalse()); 2943 Br->getTargetFalse());
2944 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2945 Operand *SrcT = Select->getTrueOperand();
2946 Operand *SrcF = Select->getFalseOperand();
2947 Variable *SelectDest = Select->getDest();
2948 typename Traits::Insts::Label *LabelFalse =
2949 Traits::Insts::Label::create(Func, this);
2950 typename Traits::Insts::Label *LabelTrue =
2951 Traits::Insts::Label::create(Func, this);
2952 lowerMove(SelectDest, SrcT, false);
2953 _cmp(Src0HiRM, Src1HiRI);
2954 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2955 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2956 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2957 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2958 _cmp(Src0LoRM, Src1LoRI);
2959 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2960 Context.insert(LabelFalse);
2961 static constexpr bool IsRedefinition = true;
2962 lowerMove(SelectDest, SrcF, IsRedefinition);
2963 Context.insert(LabelTrue);
2964 } else {
2965 llvm::report_fatal_error("Unexpected consumer type");
2899 } 2966 }
2900 } 2967 }
2901 2968
2902 template <class Machine> 2969 template <class Machine>
2903 void TargetX86Base<Machine>::setccOrBr(typename Traits::Cond::BrCond Condition, 2970 void TargetX86Base<Machine>::setccOrConsumer(
2904 Variable *Dest, const InstBr *Br) { 2971 typename Traits::Cond::BrCond Condition, Variable *Dest,
2905 if (Br == nullptr) { 2972 const Inst *Consumer) {
2973 if (Consumer == nullptr) {
2906 _setcc(Dest, Condition); 2974 _setcc(Dest, Condition);
2975 } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2976 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse());
2977 } else if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
2978 Operand *SrcT = Select->getTrueOperand();
2979 Operand *SrcF = Select->getFalseOperand();
2980 Variable *SelectDest = Select->getDest();
2981 lowerSelectMove(SelectDest, Condition, SrcT, SrcF);
2907 } else { 2982 } else {
2908 _br(Condition, Br->getTargetTrue(), Br->getTargetFalse()); 2983 llvm::report_fatal_error("Unexpected consumer type");
2909 } 2984 }
2910 } 2985 }
2911 2986
2912 template <class Machine> 2987 template <class Machine>
2913 void TargetX86Base<Machine>::movOrBr(bool IcmpResult, Variable *Dest, 2988 void TargetX86Base<Machine>::movOrConsumer(bool IcmpResult, Variable *Dest,
2914 const InstBr *Br) { 2989 const Inst *Consumer) {
2915 if (Br == nullptr) { 2990 if (Consumer == nullptr) {
2916 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 2991 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
2917 } else { 2992 } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
2918 // TODO(sehr,stichnot): This could be done with a single unconditional 2993 // TODO(sehr,stichnot): This could be done with a single unconditional
2919 // branch instruction, but subzero doesn't know how to handle the resulting 2994 // branch instruction, but subzero doesn't know how to handle the resulting
2920 // control flow graph changes now. Make it do so to eliminate mov and cmp. 2995 // control flow graph changes now. Make it do so to eliminate mov and cmp.
2921 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 2996 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
2922 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); 2997 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
2923 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 2998 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
2999 } else if (const auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {
3000 Operand *Src = nullptr;
3001 if (IcmpResult) {
3002 Src = legalize(Select->getTrueOperand(), Legal_Reg | Legal_Imm);
3003 } else {
3004 Src = legalize(Select->getFalseOperand(), Legal_Reg | Legal_Imm);
3005 }
3006 Variable *SelectDest = Select->getDest();
3007 lowerMove(SelectDest, Src, false);
3008 } else {
3009 llvm::report_fatal_error("Unexpected consumer type");
2924 } 3010 }
2925 } 3011 }
2926 3012
2927 template <class Machine> 3013 template <class Machine>
2928 void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith, 3014 void TargetX86Base<Machine>::lowerArithAndConsumer(const InstArithmetic *Arith,
2929 const InstBr *Br) { 3015 const Inst *Consumer) {
2930 Variable *T = nullptr; 3016 Variable *T = nullptr;
2931 Operand *Src0 = legalize(Arith->getSrc(0)); 3017 Operand *Src0 = legalize(Arith->getSrc(0));
2932 Operand *Src1 = legalize(Arith->getSrc(1)); 3018 Operand *Src1 = legalize(Arith->getSrc(1));
2933 Variable *Dest = Arith->getDest(); 3019 Variable *Dest = Arith->getDest();
2934 switch (Arith->getOp()) { 3020 switch (Arith->getOp()) {
2935 default: 3021 default:
2936 llvm_unreachable("arithmetic operator not AND or OR"); 3022 llvm_unreachable("arithmetic operator not AND or OR");
2937 break; 3023 break;
2938 case InstArithmetic::And: 3024 case InstArithmetic::And:
2939 _mov(T, Src0); 3025 _mov(T, Src0);
2940 // Test cannot have an address in the second position. Since T is 3026 // Test cannot have an address in the second position. Since T is
2941 // guaranteed to be a register and Src1 could be a memory load, ensure 3027 // guaranteed to be a register and Src1 could be a memory load, ensure
2942 // that the second argument is a register. 3028 // that the second argument is a register.
2943 if (llvm::isa<Constant>(Src1)) 3029 if (llvm::isa<Constant>(Src1))
2944 _test(T, Src1); 3030 _test(T, Src1);
2945 else 3031 else
2946 _test(Src1, T); 3032 _test(Src1, T);
2947 break; 3033 break;
2948 case InstArithmetic::Or: 3034 case InstArithmetic::Or:
2949 _mov(T, Src0); 3035 _mov(T, Src0);
2950 _or(T, Src1); 3036 _or(T, Src1);
2951 break; 3037 break;
2952 } 3038 }
2953 Context.insert(InstFakeUse::create(Func, T)); 3039
2954 Context.insert(InstFakeDef::create(Func, Dest)); 3040 if (Consumer == nullptr) {
2955 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 3041 llvm::report_fatal_error("Expected a consumer instruction");
3042 } else if (const auto *Br = llvm::dyn_cast<InstBr>(Consumer)) {
3043 Context.insert(InstFakeUse::create(Func, T));
3044 Context.insert(InstFakeDef::create(Func, Dest));
3045 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3046 } else {
3047 llvm::report_fatal_error("Unexpected consumer type");
3048 }
2956 } 3049 }
2957 3050
2958 template <class Machine> 3051 template <class Machine>
2959 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 3052 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2960 Operand *SourceVectNotLegalized = Inst->getSrc(0); 3053 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2961 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 3054 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2962 ConstantInteger32 *ElementIndex = 3055 ConstantInteger32 *ElementIndex =
2963 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 3056 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2964 // Only constant indices are allowed in PNaCl IR. 3057 // Only constant indices are allowed in PNaCl IR.
2965 assert(ElementIndex); 3058 assert(ElementIndex);
(...skipping 1644 matching lines...) Expand 10 before | Expand all | Expand 10 after
4610 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); 4703 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
4611 } 4704 }
4612 } 4705 }
4613 4706
4614 template <class Machine> 4707 template <class Machine>
4615 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { 4708 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4616 Func->setError("Phi found in regular instruction list"); 4709 Func->setError("Phi found in regular instruction list");
4617 } 4710 }
4618 4711
4619 template <class Machine> 4712 template <class Machine>
4620 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { 4713 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Select) {
4621 Variable *Dest = Inst->getDest(); 4714 Variable *Dest = Select->getDest();
4622 Type DestTy = Dest->getType();
4623 Operand *SrcT = Inst->getTrueOperand();
4624 Operand *SrcF = Inst->getFalseOperand();
4625 Operand *Condition = Inst->getCondition();
4626 4715
4627 if (isVectorType(DestTy)) { 4716 if (isVectorType(Dest->getType())) {
4628 Type SrcTy = SrcT->getType(); 4717 lowerSelectVector(Select);
4629 Variable *T = makeReg(SrcTy);
4630 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4631 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4632 if (InstructionSet >= Traits::SSE4_1) {
4633 // TODO(wala): If the condition operand is a constant, use blendps or
4634 // pblendw.
4635 //
4636 // Use blendvps or pblendvb to implement select.
4637 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4638 SrcTy == IceType_v4f32) {
4639 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4640 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
4641 _movp(xmm0, ConditionRM);
4642 _psll(xmm0, Ctx->getConstantInt8(31));
4643 _movp(T, SrcFRM);
4644 _blendvps(T, SrcTRM, xmm0);
4645 _movp(Dest, T);
4646 } else {
4647 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4648 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4649 : IceType_v16i8;
4650 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
4651 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4652 _movp(T, SrcFRM);
4653 _pblendvb(T, SrcTRM, xmm0);
4654 _movp(Dest, T);
4655 }
4656 return;
4657 }
4658 // Lower select without Traits::SSE4.1:
4659 // a=d?b:c ==>
4660 // if elementtype(d) != i1:
4661 // d=sext(d);
4662 // a=(b&d)|(c&~d);
4663 Variable *T2 = makeReg(SrcTy);
4664 // Sign extend the condition operand if applicable.
4665 if (SrcTy == IceType_v4f32) {
4666 // The sext operation takes only integer arguments.
4667 Variable *T3 = Func->makeVariable(IceType_v4i32);
4668 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4669 _movp(T, T3);
4670 } else if (typeElementType(SrcTy) != IceType_i1) {
4671 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4672 } else {
4673 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4674 _movp(T, ConditionRM);
4675 }
4676 _movp(T2, T);
4677 _pand(T, SrcTRM);
4678 _pandn(T2, SrcFRM);
4679 _por(T, T2);
4680 _movp(Dest, T);
4681
4682 return; 4718 return;
4683 } 4719 }
4684 4720
4685 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; 4721 Operand *Condition = Select->getCondition();
4686 Operand *CmpOpnd0 = nullptr;
4687 Operand *CmpOpnd1 = nullptr;
4688 // Handle folding opportunities. 4722 // Handle folding opportunities.
4689 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4723 if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4690 assert(Producer->isDeleted()); 4724 assert(Producer->isDeleted());
4691 switch (BoolFolding::getProducerKind(Producer)) { 4725 switch (BoolFolding::getProducerKind(Producer)) {
4692 default: 4726 default:
4693 break; 4727 break;
4694 case BoolFolding::PK_Icmp32: { 4728 case BoolFolding::PK_Icmp32:
4695 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 4729 case BoolFolding::PK_Icmp64: {
4696 Cond = Traits::getIcmp32Mapping(Cmp->getCondition()); 4730 lowerIcmpAndConsumer(llvm::dyn_cast<InstIcmp>(Producer), Select);
4697 CmpOpnd1 = legalize(Producer->getSrc(1)); 4731 return;
4698 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1); 4732 }
4699 } break; 4733 case BoolFolding::PK_Fcmp: {
4734 lowerFcmpAndConsumer(llvm::dyn_cast<InstFcmp>(Producer), Select);
4735 return;
4736 }
4700 } 4737 }
4701 } 4738 }
4702 if (CmpOpnd0 == nullptr) {
4703 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4704 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4705 }
4706 assert(CmpOpnd0);
4707 assert(CmpOpnd1);
4708 4739
4709 _cmp(CmpOpnd0, CmpOpnd1); 4740 Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);
4741 Operand *Zero = Ctx->getConstantZero(IceType_i32);
4742 _cmp(CmpResult, Zero);
4743 Operand *SrcT = Select->getTrueOperand();
4744 Operand *SrcF = Select->getFalseOperand();
4745 const typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
4746 lowerSelectMove(Dest, Cond, SrcT, SrcF);
4747 }
4748
4749 template <class Machine>
4750 void TargetX86Base<Machine>::lowerSelectMove(Variable *Dest,
4751 typename Traits::Cond::BrCond Cond,
4752 Operand *SrcT, Operand *SrcF) {
4753 Type DestTy = Dest->getType();
4710 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { 4754 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4711 // The cmov instruction doesn't allow 8-bit or FP operands, so we need 4755 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
4712 // explicit control flow. 4756 // explicit control flow.
4713 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: 4757 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4714 typename Traits::Insts::Label *Label = 4758 typename Traits::Insts::Label *Label =
4715 Traits::Insts::Label::create(Func, this); 4759 Traits::Insts::Label::create(Func, this);
4716 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4760 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4717 _mov(Dest, SrcT); 4761 _mov(Dest, SrcT);
4718 _br(Cond, Label); 4762 _br(Cond, Label);
4719 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4763 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4720 _mov_redefined(Dest, SrcF); 4764 _mov_redefined(Dest, SrcF);
4721 Context.insert(Label); 4765 Context.insert(Label);
4722 return; 4766 return;
4723 } 4767 }
4724 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4768 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4725 // But if SrcT is immediate, we might be able to do better, as the cmov 4769 // But if SrcT is immediate, we might be able to do better, as the cmov
4726 // instruction doesn't allow an immediate operand: 4770 // instruction doesn't allow an immediate operand:
4727 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4771 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4728 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4772 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4729 std::swap(SrcT, SrcF); 4773 std::swap(SrcT, SrcF);
4730 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4774 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4731 } 4775 }
4732 if (!Traits::Is64Bit && DestTy == IceType_i64) { 4776 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4733 SrcT = legalizeUndef(SrcT); 4777 SrcT = legalizeUndef(SrcT);
4734 SrcF = legalizeUndef(SrcF); 4778 SrcF = legalizeUndef(SrcF);
4735 // Set the low portion. 4779 // Set the low portion.
4736 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4780 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4737 Variable *TLo = nullptr; 4781 lowerSelectIntMove(DestLo, Cond, loOperand(SrcT), loOperand(SrcF));
4738 Operand *SrcFLo = legalize(loOperand(SrcF));
4739 _mov(TLo, SrcFLo);
4740 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4741 _cmov(TLo, SrcTLo, Cond);
4742 _mov(DestLo, TLo);
4743 // Set the high portion. 4782 // Set the high portion.
4744 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4783 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4745 Variable *THi = nullptr; 4784 lowerSelectIntMove(DestHi, Cond, hiOperand(SrcT), hiOperand(SrcF));
4746 Operand *SrcFHi = legalize(hiOperand(SrcF));
4747 _mov(THi, SrcFHi);
4748 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4749 _cmov(THi, SrcTHi, Cond);
4750 _mov(DestHi, THi);
4751 return; 4785 return;
4752 } 4786 }
4753 4787
4754 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || 4788 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4755 (Traits::Is64Bit && DestTy == IceType_i64)); 4789 (Traits::Is64Bit && DestTy == IceType_i64));
4790 lowerSelectIntMove(Dest, Cond, SrcT, SrcF);
4791 }
4792
4793 template <class Machine>
4794 void TargetX86Base<Machine>::lowerSelectIntMove(
4795 Variable *Dest, typename Traits::Cond::BrCond Cond, Operand *SrcT,
4796 Operand *SrcF) {
4756 Variable *T = nullptr; 4797 Variable *T = nullptr;
4757 SrcF = legalize(SrcF); 4798 SrcF = legalize(SrcF);
4758 _mov(T, SrcF); 4799 _mov(T, SrcF);
4759 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4800 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4760 _cmov(T, SrcT, Cond); 4801 _cmov(T, SrcT, Cond);
4761 _mov(Dest, T); 4802 _mov(Dest, T);
4762 } 4803 }
4763 4804
4764 template <class Machine> 4805 template <class Machine>
4806 void TargetX86Base<Machine>::lowerMove(Variable *Dest, Operand *Src,
John 2015/12/07 13:07:44 What do you think about asserting !Src->isRemateri
sehr 2015/12/15 20:45:44 Done.
4807 bool IsRedefinition) {
4808 assert(Dest->getType() == Src->getType());
4809 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
4810 Src = legalize(Src);
4811 Operand *SrcLo = loOperand(Src);
4812 Operand *SrcHi = hiOperand(Src);
4813 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4814 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4815 Variable *T_Lo = nullptr, *T_Hi = nullptr;
4816 _mov(T_Lo, SrcLo);
4817 if (IsRedefinition) {
John 2015/12/07 13:07:44 I would personally do _mov(DestLo, T_Lo); if (IsR
Jim Stichnoth 2015/12/08 18:55:59 I agree with John's preference on this. Thinking
sehr 2015/12/15 20:45:44 Followed Jim's extension of this suggestion.
sehr 2015/12/15 20:45:44 Done.
4818 _mov_redefined(DestLo, T_Lo);
4819 } else {
4820 _mov(DestLo, T_Lo);
4821 }
4822 _mov(T_Hi, SrcHi);
4823 if (IsRedefinition) {
4824 _mov_redefined(DestHi, T_Hi);
4825 } else {
4826 _mov(DestHi, T_Hi);
4827 }
4828 } else {
4829 Operand *SrcLegal;
4830 if (Dest->hasReg()) {
4831 // If Dest already has a physical register, then only basic legalization
4832 // is needed, as the source operand can be a register, immediate, or
4833 // memory.
4834 SrcLegal = legalize(Src, Legal_Reg, Dest->getRegNum());
4835 } else {
4836 // If Dest could be a stack operand, then RI must be a physical register
4837 // or a scalar integer immediate.
4838 SrcLegal = legalize(Src, Legal_Reg | Legal_Imm);
4839 }
4840 if (isVectorType(Dest->getType())) {
4841 if (IsRedefinition) {
4842 _movp_redefined(Dest, SrcLegal);
4843 } else {
4844 _movp(Dest, SrcLegal);
4845 }
4846 } else {
4847 if (IsRedefinition) {
4848 _mov_redefined(Dest, SrcLegal);
4849 } else {
4850 _mov(Dest, SrcLegal);
4851 }
4852 }
4853 }
4854 }
4855
4856 template <class Machine>
4857 bool TargetX86Base<Machine>::lowerOptimizeFcmpSelect(const InstFcmp *Fcmp,
4858 const InstSelect *Select) {
4859 Operand *CmpSrc0 = Fcmp->getSrc(0);
4860 Operand *CmpSrc1 = Fcmp->getSrc(1);
4861 Operand *SelectSrcT = Select->getTrueOperand();
4862 Operand *SelectSrcF = Select->getFalseOperand();
4863
4864 if (CmpSrc0->getType() != SelectSrcT->getType())
4865 return false;
4866
4867 // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here.
4868 InstFcmp::FCond Condition = Fcmp->getCondition();
4869 switch (Condition) {
4870 default:
4871 return false;
4872 case InstFcmp::True:
4873 case InstFcmp::False:
4874 case InstFcmp::Ogt:
4875 case InstFcmp::Olt:
4876 (void)CmpSrc0;
4877 (void)CmpSrc1;
4878 (void)SelectSrcT;
4879 (void)SelectSrcF;
4880 break;
4881 }
4882 return false;
4883 }
4884
4885 template <class Machine>
4886 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Icmp) {
4887 Variable *Dest = Icmp->getDest();
4888 if (isVectorType(Dest->getType())) {
4889 lowerIcmpVector(Icmp);
4890 } else {
4891 constexpr Inst *Consumer = nullptr;
4892 lowerIcmpAndConsumer(Icmp, Consumer);
4893 }
4894 }
4895
4896 template <class Machine>
4897 void TargetX86Base<Machine>::lowerSelectVector(const InstSelect *Inst) {
4898 Variable *Dest = Inst->getDest();
4899 Type DestTy = Dest->getType();
4900 Operand *SrcT = Inst->getTrueOperand();
4901 Operand *SrcF = Inst->getFalseOperand();
4902 Operand *Condition = Inst->getCondition();
4903
4904 if (!isVectorType(DestTy))
4905 llvm::report_fatal_error("Expected a vector select");
4906
4907 Type SrcTy = SrcT->getType();
4908 Variable *T = makeReg(SrcTy);
4909 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4910 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4911 if (InstructionSet >= Traits::SSE4_1) {
4912 // TODO(wala): If the condition operand is a constant, use blendps or
4913 // pblendw.
4914 //
4915 // Use blendvps or pblendvb to implement select.
4916 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4917 SrcTy == IceType_v4f32) {
4918 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4919 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
4920 _movp(xmm0, ConditionRM);
4921 _psll(xmm0, Ctx->getConstantInt8(31));
4922 _movp(T, SrcFRM);
4923 _blendvps(T, SrcTRM, xmm0);
4924 _movp(Dest, T);
4925 } else {
4926 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4927 Type SignExtTy =
4928 Condition->getType() == IceType_v8i1 ? IceType_v8i16 : IceType_v16i8;
4929 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
4930 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4931 _movp(T, SrcFRM);
4932 _pblendvb(T, SrcTRM, xmm0);
4933 _movp(Dest, T);
4934 }
4935 return;
4936 }
4937 // Lower select without Traits::SSE4.1:
4938 // a=d?b:c ==>
4939 // if elementtype(d) != i1:
4940 // d=sext(d);
4941 // a=(b&d)|(c&~d);
4942 Variable *T2 = makeReg(SrcTy);
4943 // Sign extend the condition operand if applicable.
4944 if (SrcTy == IceType_v4f32) {
4945 // The sext operation takes only integer arguments.
4946 Variable *T3 = Func->makeVariable(IceType_v4i32);
4947 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4948 _movp(T, T3);
4949 } else if (typeElementType(SrcTy) != IceType_i1) {
4950 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4951 } else {
4952 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4953 _movp(T, ConditionRM);
4954 }
4955 _movp(T2, T);
4956 _pand(T, SrcTRM);
4957 _pandn(T2, SrcFRM);
4958 _por(T, T2);
4959 _movp(Dest, T);
4960
4961 return;
4962 }
4963
4964 template <class Machine>
4765 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4965 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4766 Operand *Value = Inst->getData(); 4966 Operand *Value = Inst->getData();
4767 Operand *Addr = Inst->getAddr(); 4967 Operand *Addr = Inst->getAddr();
4768 typename Traits::X86OperandMem *NewAddr = 4968 typename Traits::X86OperandMem *NewAddr =
4769 formMemoryOperand(Addr, Value->getType()); 4969 formMemoryOperand(Addr, Value->getType());
4770 doMockBoundsCheck(NewAddr); 4970 doMockBoundsCheck(NewAddr);
4771 Type Ty = NewAddr->getType(); 4971 Type Ty = NewAddr->getType();
4772 4972
4773 if (!Traits::Is64Bit && Ty == IceType_i64) { 4973 if (!Traits::Is64Bit && Ty == IceType_i64) {
4774 Value = legalizeUndef(Value); 4974 Value = legalizeUndef(Value);
(...skipping 738 matching lines...) Expand 10 before | Expand all | Expand 10 after
5513 case IceType_i8: 5713 case IceType_i8:
5514 case IceType_i16: 5714 case IceType_i16:
5515 case IceType_i32: 5715 case IceType_i32:
5516 case IceType_i64: 5716 case IceType_i64:
5517 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. 5717 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
5518 _mov(Reg, Ctx->getConstantZero(Ty)); 5718 _mov(Reg, Ctx->getConstantZero(Ty));
5519 break; 5719 break;
5520 case IceType_f32: 5720 case IceType_f32:
5521 case IceType_f64: 5721 case IceType_f64:
5522 Context.insert(InstFakeDef::create(Func, Reg)); 5722 Context.insert(InstFakeDef::create(Func, Reg));
5523 // TODO(stichnot): Use xorps/xorpd instead of pxor. 5723 // TODO(stichnot): Use xorps/xorpd instead of pxor.
Jim Stichnoth 2015/12/08 18:55:59 TODONE?
sehr 2015/12/15 20:45:44 TODONEDONE.
5524 _pxor(Reg, Reg); 5724 _xorps(Reg, Reg);
5525 break; 5725 break;
5526 default: 5726 default:
5527 // All vector types use the same pxor instruction. 5727 // All vector types use the same pxor instruction.
5528 assert(isVectorType(Ty)); 5728 assert(isVectorType(Ty));
5529 Context.insert(InstFakeDef::create(Func, Reg)); 5729 Context.insert(InstFakeDef::create(Func, Reg));
5530 _pxor(Reg, Reg); 5730 _pxor(Reg, Reg);
5531 break; 5731 break;
5532 } 5732 }
5533 return Reg; 5733 return Reg;
5534 } 5734 }
(...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after
6206 } 6406 }
6207 // the offset is not eligible for blinding or pooling, return the original 6407 // the offset is not eligible for blinding or pooling, return the original
6208 // mem operand 6408 // mem operand
6209 return MemOperand; 6409 return MemOperand;
6210 } 6410 }
6211 6411
6212 } // end of namespace X86Internal 6412 } // end of namespace X86Internal
6213 } // end of namespace Ice 6413 } // end of namespace Ice
6214 6414
6215 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6415 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698