Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1141213004: Subzero: Fold icmp into br/select lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Comments and cleanup Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 // entries in case the high-level table has extra entries. 253 // entries in case the high-level table has extra entries.
254 #define X(tag, size, align, elts, elty, str) \ 254 #define X(tag, size, align, elts, elty, str) \
255 static_assert(_table1_##tag == _table2_##tag, \ 255 static_assert(_table1_##tag == _table2_##tag, \
256 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); 256 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
257 ICETYPE_TABLE 257 ICETYPE_TABLE
258 #undef X 258 #undef X
259 } // end of namespace dummy3 259 } // end of namespace dummy3
260 260
261 } // end of anonymous namespace 261 } // end of anonymous namespace
262 262
263 BoolFoldingEntry::BoolFoldingEntry(Inst *I)
264 : Var(I->getDest()), Instr(I),
265 IsComplex(BoolFolding::hasComplexLowering(I)), IsLiveOut(true),
266 NumUses(0) {}
267
268 BoolFoldingProducerKind BoolFolding::getProducerKind(const Inst *Instr) {
269 if (llvm::isa<InstIcmp>(Instr)) {
270 if (Instr->getSrc(0)->getType() != IceType_i64)
271 return PK_Icmp32;
272 return PK_None; // TODO(stichnot): actually PK_Icmp64;
273 }
274 return PK_None; // TODO(stichnot): remove this
275
276 if (llvm::isa<InstFcmp>(Instr))
277 return PK_Fcmp;
278 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
279 switch (Cast->getCastKind()) {
280 default:
281 return PK_None;
282 case InstCast::Trunc:
283 return PK_Trunc;
284 }
285 }
286 return PK_None;
287 }
288
289 BoolFoldingConsumerKind BoolFolding::getConsumerKind(const Inst *Instr) {
290 if (llvm::isa<InstBr>(Instr))
291 return CK_Br;
292 if (llvm::isa<InstSelect>(Instr))
293 return CK_Select;
294 return CK_None; // TODO(stichnot): remove this
295
296 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
297 switch (Cast->getCastKind()) {
298 default:
299 return CK_None;
300 case InstCast::Sext:
301 return CK_Sext;
302 case InstCast::Zext:
303 return CK_Zext;
304 }
305 }
306 return CK_None;
307 }
308
309 // Returns true if the producing instruction has a "complex" lowering
310 // sequence. This generally that its lowering sequence requires more
jvoung (off chromium) 2015/05/15 21:48:20 "generally that" -> "generally means that", or som
Jim Stichnoth 2015/05/16 17:32:50 Done.
311 // than one conditional branch, namely 64-bit integer compares and
312 // some floating-point compares. When this is true, and there is more
313 // than one consumer, we prefer to disable the folding optimization
314 // because it minimizes branches.
315 bool BoolFolding::hasComplexLowering(const Inst *Instr) {
316 switch (getProducerKind(Instr)) {
317 default:
318 return false;
319 case PK_Icmp64:
320 return true;
321 case PK_Fcmp:
322 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
323 CondX86::Br_None;
324 }
325 }
326
327 void BoolFolding::init(CfgNode *Node) {
328 Producers.clear();
329 for (Inst &Instr : Node->getInsts()) {
330 // Check whether Instr is a valid producer.
331 Variable *Var = Instr.getDest();
332 if (!Instr.isDeleted() // only consider non-deleted instructions
333 && Var // only instructions with an actual dest var
334 && Var->getType() == IceType_i1 // only bool-type dest vars
335 && getProducerKind(&Instr) != PK_None) { // white-listed instructions
336 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
337 }
338 // Check each src variable against the map.
339 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
340 Operand *Src = Instr.getSrc(I);
341 SizeT NumVars = Src->getNumVars();
342 for (SizeT J = 0; J < NumVars; ++J) {
343 const Variable *Var = Src->getVar(J);
344 SizeT VarNum = Var->getIndex();
345 if (containsValid(VarNum)) {
346 if (I != 0 // All valid consumers use Var as the first source operand
347 || getConsumerKind(&Instr) == CK_None // must be white-listed
348 || (Producers[VarNum].IsComplex && // complex can't be multi-use
349 Producers[VarNum].NumUses > 0)) {
350 setInvalid(VarNum);
351 continue;
352 }
353 ++Producers[VarNum].NumUses;
354 if (Instr.isLastUse(Var)) {
355 Producers[VarNum].IsLiveOut = false;
356 }
357 }
358 }
359 }
360 }
361 for (auto &I : Producers) {
362 // Ignore entries previously marked invalid.
363 if (I.second.Instr == nullptr)
364 continue;
365 // Disable the producer if its dest may be live beyond this block.
366 if (I.second.IsLiveOut) {
367 setInvalid(I.first);
368 continue;
369 }
370 // Mark as "dead" rather than outright deleting. This is so that
371 // other peephole style optimizations during or before lowering
372 // have access to this instruction in undeleted form. See for
373 // example tryOptimizedCmpxchgCmpBr().
374 I.second.Instr->setDead();
375 }
376 }
377
378 const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
379 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
380 if (Var == nullptr)
381 return nullptr;
382 SizeT VarNum = Var->getIndex();
383 if (Producers.find(VarNum) == Producers.end())
jvoung (off chromium) 2015/05/15 21:48:20 similar -- if two lookups is a concern
Jim Stichnoth 2015/05/16 17:32:50 Done.
384 return nullptr;
385 return Producers.at(VarNum).Instr;
386 }
387
388 void BoolFolding::dump(const Cfg *Func) const {
389 if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
390 return;
391 OstreamLocker L(Func->getContext());
392 Ostream &Str = Func->getContext()->getStrDump();
393 for (auto &I : Producers) {
394 if (I.second.Instr == nullptr)
395 continue;
396 Str << "Found foldable producer:\n ";
397 I.second.Instr->dump(Func);
398 Str << "\n";
399 }
400 }
401
402 void TargetX8632::initNodeForLowering(CfgNode *Node) {
403 FoldingInfo.init(Node);
404 FoldingInfo.dump(Func);
405 }
406
263 TargetX8632::TargetX8632(Cfg *Func) 407 TargetX8632::TargetX8632(Cfg *Func)
264 : TargetLowering(Func), 408 : TargetLowering(Func),
265 InstructionSet(static_cast<X86InstructionSet>( 409 InstructionSet(static_cast<X86InstructionSet>(
266 Func->getContext()->getFlags().getTargetInstructionSet() - 410 Func->getContext()->getFlags().getTargetInstructionSet() -
267 TargetInstructionSet::X86InstructionSet_Begin)), 411 TargetInstructionSet::X86InstructionSet_Begin)),
268 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), 412 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
269 SpillAreaSizeBytes(0) { 413 SpillAreaSizeBytes(0) {
270 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == 414 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==
271 (TargetInstructionSet::X86InstructionSet_End - 415 (TargetInstructionSet::X86InstructionSet_End -
272 TargetInstructionSet::X86InstructionSet_Begin), 416 TargetInstructionSet::X86InstructionSet_Begin),
(...skipping 1439 matching lines...) Expand 10 before | Expand all | Expand 10 after
1712 if (isVectorType(Dest->getType())) 1856 if (isVectorType(Dest->getType()))
1713 _movp(Dest, RI); 1857 _movp(Dest, RI);
1714 else 1858 else
1715 _mov(Dest, RI); 1859 _mov(Dest, RI);
1716 } 1860 }
1717 } 1861 }
1718 1862
1719 void TargetX8632::lowerBr(const InstBr *Inst) { 1863 void TargetX8632::lowerBr(const InstBr *Inst) {
1720 if (Inst->isUnconditional()) { 1864 if (Inst->isUnconditional()) {
1721 _br(Inst->getTargetUnconditional()); 1865 _br(Inst->getTargetUnconditional());
1722 } else { 1866 return;
1723 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
1724 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1725 _cmp(Src0, Zero);
1726 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1727 } 1867 }
1868 Operand *Cond = Inst->getCondition();
1869
1870 // Handle folding opportunities.
1871 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1872 assert(Producer->isDeleted());
1873 switch (BoolFolding::getProducerKind(Producer)) {
1874 default:
1875 break;
1876 case PK_Icmp32: {
1877 // TODO(stichnot): Refactor similarities between this block and
1878 // the corresponding code in lowerIcmp().
jvoung (off chromium) 2015/05/15 21:48:19 As a suggestion, maybe factor out the IsSrc1ImmOrR
Jim Stichnoth 2015/05/16 17:32:50 This is good. I created a new legalize() routine
1879 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
1880 Operand *Src0 = legalize(Producer->getSrc(0));
1881 Operand *Src1 = legalize(Producer->getSrc(1));
1882 bool IsSrc1ImmOrReg = false;
1883 if (llvm::isa<Constant>(Src1)) {
1884 IsSrc1ImmOrReg = true;
1885 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
1886 if (Var->hasReg())
1887 IsSrc1ImmOrReg = true;
1888 }
1889 Operand *Src0RM =
1890 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
1891 _cmp(Src0RM, Src1);
1892 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
1893 Inst->getTargetFalse());
1894 return;
1895 } break;
jvoung (off chromium) 2015/05/15 21:48:19 nit: no need for break; after return;? (okay if t
Jim Stichnoth 2015/05/16 17:32:50 You're right - the "break" was left over from my s
1896 }
1897 }
1898
1899 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1900 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1901 _cmp(Src0, Zero);
1902 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1728 } 1903 }
1729 1904
1730 void TargetX8632::lowerCall(const InstCall *Instr) { 1905 void TargetX8632::lowerCall(const InstCall *Instr) {
1731 // x86-32 calling convention: 1906 // x86-32 calling convention:
1732 // 1907 //
1733 // * At the point before the call, the stack must be aligned to 16 1908 // * At the point before the call, the stack must be aligned to 16
1734 // bytes. 1909 // bytes.
1735 // 1910 //
1736 // * The first four arguments of vector type, regardless of their 1911 // * The first four arguments of vector type, regardless of their
1737 // position relative to the other arguments in the argument list, are 1912 // position relative to the other arguments in the argument list, are
(...skipping 938 matching lines...) Expand 10 before | Expand all | Expand 10 after
2676 _movp(Dest, T); 2851 _movp(Dest, T);
2677 eliminateNextVectorSextInstruction(Dest); 2852 eliminateNextVectorSextInstruction(Dest);
2678 return; 2853 return;
2679 } 2854 }
2680 2855
2681 // If Src1 is an immediate, or known to be a physical register, we can 2856 // If Src1 is an immediate, or known to be a physical register, we can
2682 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2857 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2683 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2858 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2684 // the physical register, but unfortunately we have to commit to one or 2859 // the physical register, but unfortunately we have to commit to one or
2685 // the other before register allocation.) 2860 // the other before register allocation.)
2686 bool IsSrc1ImmOrReg = false; 2861 bool IsSrc1ImmOrReg = false;
jvoung (off chromium) 2015/05/15 21:48:20 Might be able to move this down to after the i64 c
Jim Stichnoth 2015/05/16 17:32:50 Done.
2687 if (llvm::isa<Constant>(Src1)) { 2862 if (llvm::isa<Constant>(Src1)) {
2688 IsSrc1ImmOrReg = true; 2863 IsSrc1ImmOrReg = true;
2689 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 2864 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
2690 if (Var->hasReg()) 2865 if (Var->hasReg())
2691 IsSrc1ImmOrReg = true; 2866 IsSrc1ImmOrReg = true;
2692 } 2867 }
2693 2868
2694 // Try to fuse a compare immediately followed by a conditional branch. This
2695 // is possible when the compare dest and the branch source operands are the
2696 // same, and are their only uses. TODO: implement this optimization for i64.
2697 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) {
2698 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() &&
2699 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) {
2700 NextBr->setDeleted();
2701 Operand *Src0RM =
2702 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
2703 _cmp(Src0RM, Src1);
2704 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2705 NextBr->getTargetFalse());
2706 // Skip over the following branch instruction.
2707 Context.advanceNext();
2708 return;
2709 }
2710 }
2711
2712 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2869 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2713 if (Src0->getType() == IceType_i64) { 2870 if (Src0->getType() == IceType_i64) {
2714 InstIcmp::ICond Condition = Inst->getCondition(); 2871 InstIcmp::ICond Condition = Inst->getCondition();
2715 size_t Index = static_cast<size_t>(Condition); 2872 size_t Index = static_cast<size_t>(Condition);
2716 assert(Index < TableIcmp64Size); 2873 assert(Index < TableIcmp64Size);
2717 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 2874 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2718 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 2875 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2719 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2876 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2720 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2877 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2721 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2878 Constant *Zero = Ctx->getConstantZero(IceType_i32);
(...skipping 1272 matching lines...) Expand 10 before | Expand all | Expand 10 after
3994 } 4151 }
3995 _movp(T2, T); 4152 _movp(T2, T);
3996 _pand(T, SrcTRM); 4153 _pand(T, SrcTRM);
3997 _pandn(T2, SrcFRM); 4154 _pandn(T2, SrcFRM);
3998 _por(T, T2); 4155 _por(T, T2);
3999 _movp(Dest, T); 4156 _movp(Dest, T);
4000 4157
4001 return; 4158 return;
4002 } 4159 }
4003 4160
4004 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: 4161 // Handle folding opportunities.
4162 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4163 assert(Producer->isDeleted());
4164 switch (BoolFolding::getProducerKind(Producer)) {
4165 default:
4166 break;
4167 case PK_Icmp32: {
4168 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4169 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
4170 InstX8632Label *Label = InstX8632Label::create(Func, this);
4171 Operand *Src0 = legalize(Producer->getSrc(0));
4172 Operand *Src1 = legalize(Producer->getSrc(1));
4173 bool IsSrc1ImmOrReg = false;
4174 if (llvm::isa<Constant>(Src1)) {
4175 IsSrc1ImmOrReg = true;
4176 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
4177 if (Var->hasReg())
4178 IsSrc1ImmOrReg = true;
4179 }
4180 Operand *Src0RM =
4181 legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
4182 _cmp(Src0RM, Src1);
4183 // This is the same code as below (for both i64 and non-i64),
4184 // except without the _cmp instruction and with a different
4185 // branch condition. TODO(stichnot): refactor.
4186 if (Dest->getType() == IceType_i64) {
4187 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4188 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4189 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
4190 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
4191 _mov(DestLo, SrcLoRI);
4192 _mov(DestHi, SrcHiRI);
4193 _br(getIcmp32Mapping(Cmp->getCondition()), Label);
4194 Operand *SrcFLo = loOperand(SrcF);
4195 Operand *SrcFHi = hiOperand(SrcF);
4196 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm);
4197 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm);
4198 _mov_nonkillable(DestLo, SrcLoRI);
4199 _mov_nonkillable(DestHi, SrcHiRI);
4200 } else {
4201 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4202 _mov(Dest, SrcT);
4203 _br(getIcmp32Mapping(Cmp->getCondition()), Label);
4204 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4205 _mov_nonkillable(Dest, SrcF);
4206 }
4207 Context.insert(Label);
4208 return;
4209 } break;
4210 }
4211 }
4212
4213 // a=d?b:c ==> cmp d,0; a=b; jne L1; a=c; L1:
4005 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4214 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4006 Constant *Zero = Ctx->getConstantZero(IceType_i32); 4215 Constant *Zero = Ctx->getConstantZero(IceType_i32);
4007 InstX8632Label *Label = InstX8632Label::create(Func, this); 4216 InstX8632Label *Label = InstX8632Label::create(Func, this);
4008 4217
4009 if (Dest->getType() == IceType_i64) { 4218 if (Dest->getType() == IceType_i64) {
4010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4219 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4220 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4012 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm); 4221 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm);
4013 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm); 4222 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm);
4014 _cmp(ConditionRM, Zero); 4223 _cmp(ConditionRM, Zero);
(...skipping 834 matching lines...) Expand 10 before | Expand all | Expand 10 after
4849 case FT_Asm: 5058 case FT_Asm:
4850 case FT_Iasm: { 5059 case FT_Iasm: {
4851 OstreamLocker L(Ctx); 5060 OstreamLocker L(Ctx);
4852 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5061 emitConstantPool<PoolTypeConverter<float>>(Ctx);
4853 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5062 emitConstantPool<PoolTypeConverter<double>>(Ctx);
4854 } break; 5063 } break;
4855 } 5064 }
4856 } 5065 }
4857 5066
4858 } // end of namespace Ice 5067 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698