Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(243)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1897243002: Subzero. Rematerializes shufflevector instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTimerTree.def » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 410 matching lines...) Expand 10 before | Expand all | Expand 10 after
421 // code. 421 // code.
422 // TODO(stichnot,ascull): currently only used for regalloc not 422 // TODO(stichnot,ascull): currently only used for regalloc not
423 // expensive high level optimizations which could be focused on potentially 423 // expensive high level optimizations which could be focused on potentially
424 // hot code. 424 // hot code.
425 Func->computeLoopNestDepth(); 425 Func->computeLoopNestDepth();
426 Func->dump("After loop nest depth analysis"); 426 Func->dump("After loop nest depth analysis");
427 427
428 // Address mode optimization. 428 // Address mode optimization.
429 Func->getVMetadata()->init(VMK_SingleDefs); 429 Func->getVMetadata()->init(VMK_SingleDefs);
430 Func->doAddressOpt(); 430 Func->doAddressOpt();
431 Func->materializeVectorShuffles();
431 432
432 // Find read-modify-write opportunities. Do this after address mode 433 // Find read-modify-write opportunities. Do this after address mode
433 // optimization so that doAddressOpt() doesn't need to be applied to RMW 434 // optimization so that doAddressOpt() doesn't need to be applied to RMW
434 // instructions as well. 435 // instructions as well.
435 findRMW(); 436 findRMW();
436 Func->dump("After RMW transform"); 437 Func->dump("After RMW transform");
437 438
438 // Argument lowering 439 // Argument lowering
439 Func->doArgLowering(); 440 Func->doArgLowering();
440 441
(...skipping 5106 matching lines...) Expand 10 before | Expand all | Expand 10 after
5547 // Add a ret instruction even if sandboxing is enabled, because addEpilog 5548 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5548 // explicitly looks for a ret instruction as a marker for where to insert the 5549 // explicitly looks for a ret instruction as a marker for where to insert the
5549 // frame removal instructions. 5550 // frame removal instructions.
5550 _ret(Reg); 5551 _ret(Reg);
5551 // Add a fake use of esp to make sure esp stays alive for the entire 5552 // Add a fake use of esp to make sure esp stays alive for the entire
5552 // function. Otherwise post-call esp adjustments get dead-code eliminated. 5553 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5553 keepEspLiveAtExit(); 5554 keepEspLiveAtExit();
5554 } 5555 }
5555 5556
5556 template <typename TraitsType> 5557 template <typename TraitsType>
5558 void TargetX86Base<TraitsType>::lowerShuffleVector(
5559 const InstShuffleVector *Instr) {
5560 auto *Dest = Instr->getDest();
5561 const Type DestTy = Dest->getType();
5562
5563 auto *T = makeReg(DestTy);
5564
5565 switch (DestTy) {
5566 default:
5567 break;
5568 // TODO(jpp): figure out how to properly lower this without scalarization.
5569 }
5570
5571 // Unoptimized shuffle. Perform a series of inserts and extracts.
5572 Context.insert<InstFakeDef>(T);
5573 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5574 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5575 const SizeT NumElements = typeNumElements(DestTy);
5576 const Type ElementType = typeElementType(DestTy);
5577 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5578 auto *Index = Instr->getIndex(I);
5579 const SizeT Elem = Index->getValue();
5580 auto *ExtElmt = makeReg(ElementType);
5581 if (Elem < NumElements) {
5582 lowerExtractElement(
5583 InstExtractElement::create(Func, ExtElmt, Src0, Index));
5584 } else {
5585 lowerExtractElement(InstExtractElement::create(
5586 Func, ExtElmt, Src1,
5587 Ctx->getConstantInt32(Index->getValue() - NumElements)));
5588 }
5589 auto *NewT = makeReg(DestTy);
5590 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
5591 Ctx->getConstantInt32(I)));
5592 T = NewT;
5593 }
5594 _movp(Dest, T);
5595 }
5596
5597 template <typename TraitsType>
5557 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { 5598 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
5558 Variable *Dest = Select->getDest(); 5599 Variable *Dest = Select->getDest();
5559 5600
5560 if (isVectorType(Dest->getType())) { 5601 if (isVectorType(Dest->getType())) {
5561 lowerSelectVector(Select); 5602 lowerSelectVector(Select);
5562 return; 5603 return;
5563 } 5604 }
5564 5605
5565 Operand *Condition = Select->getCondition(); 5606 Operand *Condition = Select->getCondition();
5566 // Handle folding opportunities. 5607 // Handle folding opportunities.
(...skipping 1856 matching lines...) Expand 10 before | Expand all | Expand 10 after
7423 emitGlobal(*Var, SectionSuffix); 7464 emitGlobal(*Var, SectionSuffix);
7424 } 7465 }
7425 } 7466 }
7426 } break; 7467 } break;
7427 } 7468 }
7428 } 7469 }
7429 } // end of namespace X86NAMESPACE 7470 } // end of namespace X86NAMESPACE
7430 } // end of namespace Ice 7471 } // end of namespace Ice
7431 7472
7432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7473 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTimerTree.def » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698