Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(383)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1909013002: Subzero. X86. Lowers shufflevector using xmm instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: fixes 80-col Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 5552 matching lines...) Expand 10 before | Expand all | Expand 10 after
5563 } 5563 }
5564 // Add a ret instruction even if sandboxing is enabled, because addEpilog 5564 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5565 // explicitly looks for a ret instruction as a marker for where to insert the 5565 // explicitly looks for a ret instruction as a marker for where to insert the
5566 // frame removal instructions. 5566 // frame removal instructions.
5567 _ret(Reg); 5567 _ret(Reg);
5568 // Add a fake use of esp to make sure esp stays alive for the entire 5568 // Add a fake use of esp to make sure esp stays alive for the entire
5569 // function. Otherwise post-call esp adjustments get dead-code eliminated. 5569 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5570 keepEspLiveAtExit(); 5570 keepEspLiveAtExit();
5571 } 5571 }
5572 5572
5573 inline int32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
Jim Stichnoth 2016/04/25 21:23:45 I'm wondering whether these SizeT should be explic
John 2016/04/25 22:38:39 These stem from SizeT values -- i.e., operand inde
Jim Stichnoth 2016/04/25 23:09:03 I should have added more to my description above o
John 2016/04/26 11:14:13 Acknowledged.
5574 SizeT Index3) {
5575 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
5576 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
5577 assert(Mask < 256);
5578 return Mask;
Jim Stichnoth 2016/04/25 21:23:45 Mask is unsigned, but the function returns a signe
John 2016/04/25 22:38:39 Mixing signed and unsigned integer types... what c
5579 }
5580
5581 template <typename TraitsType>
5582 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
5583 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
5584 constexpr SizeT SrcBit = 1 << 2;
5585 assert((Index0 & SrcBit) == (Index1 & SrcBit));
5586 assert((Index0 & SrcBit) == (Index2 & SrcBit));
5587 assert((Index0 & SrcBit) == (Index3 & SrcBit));
5588 (void)SrcBit;
5589
5590 const Type SrcTy = Src->getType();
5591 auto *T = makeReg(SrcTy);
5592 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
5593 auto *Mask =
5594 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5595 _pshufd(T, SrcRM, Mask);
5596 return T;
5597 }
5598
5599 template <typename TraitsType>
5600 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
5601 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
5602 SizeT Index3) {
5603 constexpr SizeT SrcBit = 1 << 2;
5604 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
5605 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
5606 (void)SrcBit;
5607
5608 const Type SrcTy = Src0->getType();
5609 assert(Src1->getType() == SrcTy);
5610 auto *T = makeReg(SrcTy);
5611 auto *Src0R = legalizeToReg(Src0);
5612 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5613 auto *Mask =
5614 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5615 _movp(T, Src0R);
5616 _shufps(T, Src1RM, Mask);
5617 return T;
5618 }
5619
5620 template <typename TraitsType>
5621 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
5622 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
5623 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
5624 Index1, IGNORE_INDEX);
5625 }
5626
5627 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
5628 SizeT Index3) {
5629 constexpr SizeT SrcBit = 1 << 2;
5630 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
5631 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
5632 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
5633 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
5634 return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
5635 }
5636
5573 template <typename TraitsType> 5637 template <typename TraitsType>
5574 void TargetX86Base<TraitsType>::lowerShuffleVector( 5638 void TargetX86Base<TraitsType>::lowerShuffleVector(
5575 const InstShuffleVector *Instr) { 5639 const InstShuffleVector *Instr) {
5576 auto *Dest = Instr->getDest(); 5640 auto *Dest = Instr->getDest();
5577 const Type DestTy = Dest->getType(); 5641 const Type DestTy = Dest->getType();
5642 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5643 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5644 const SizeT NumElements = typeNumElements(DestTy);
5578 5645
5579 auto *T = makeReg(DestTy); 5646 auto *T = makeReg(DestTy);
5580 5647
5581 switch (DestTy) { 5648 switch (DestTy) {
5582 default: 5649 default:
5583 break; 5650 break;
5584 // TODO(jpp): figure out how to properly lower this without scalarization. 5651 // TODO(jpp): figure out how to properly lower the remaining cases without
5652 // scalarization.
5653 case IceType_v4i1:
5654 case IceType_v4i32:
5655 case IceType_v4f32: {
5656 static constexpr SizeT ExpectedNumElements = 4;
5657 assert(ExpectedNumElements == Instr->getNumIndexes());
5658 const SizeT Index0 = Instr->getIndex(0)->getValue();
5659 const SizeT Index1 = Instr->getIndex(1)->getValue();
5660 const SizeT Index2 = Instr->getIndex(2)->getValue();
5661 const SizeT Index3 = Instr->getIndex(3)->getValue();
5662 Variable *T = nullptr;
5663 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
5664 #define CASE_SRCS_IN(S0, S1, S2, S3) \
5665 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
5666 CASE_SRCS_IN(0, 0, 0, 0) : {
5667 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
5668 Index3);
5669 }
5670 break;
5671 CASE_SRCS_IN(0, 0, 0, 1) : {
5672 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5673 Src1, Index3);
5674 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5675 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5676 }
5677 break;
5678 CASE_SRCS_IN(0, 0, 1, 0) : {
5679 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5680 Src0, Index3);
5681 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5682 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5683 }
5684 break;
5685 CASE_SRCS_IN(0, 0, 1, 1) : {
5686 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
5687 Index2, Index3);
5688 }
5689 break;
5690 CASE_SRCS_IN(0, 1, 0, 0) : {
5691 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5692 Src1, Index1);
5693 T = lowerShuffleVector_TwoFromSameSrc(
5694 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5695 }
5696 break;
5697 CASE_SRCS_IN(0, 1, 0, 1) : {
5698 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
5699 (Index3 - ExpectedNumElements) == 1) {
5700 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5701 auto *Src0R = legalizeToReg(Src0);
5702 T = makeReg(DestTy);
5703 _movp(T, Src0R);
5704 _punpckl(T, Src1RM);
5705 assert(false);
Jim Stichnoth 2016/04/25 21:23:45 ??? Is something got getting tested that ought to
John 2016/04/25 22:38:39 Discussed offline. For posterity: none of the CAS
5706 } else if (Index0 == Index2 && Index1 == Index3) {
5707 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5708 Src0, Index0, Src1, Index1);
5709 T = lowerShuffleVector_AllFromSameSrc(
5710 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5711 UNIFIED_INDEX_1);
5712 } else {
5713 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5714 Src0, Index0, Src1, Index1);
5715 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5716 Src0, Index2, Src1, Index3);
5717 T = lowerShuffleVector_TwoFromSameSrc(
5718 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5719 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5720 }
5721 }
5722 break;
5723 CASE_SRCS_IN(0, 1, 1, 0) : {
5724 if (Index0 == Index3 && Index1 == Index2) {
5725 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5726 Src0, Index0, Src1, Index1);
5727 T = lowerShuffleVector_AllFromSameSrc(
5728 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5729 UNIFIED_INDEX_0);
5730 } else {
5731 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5732 Src0, Index0, Src1, Index1);
5733 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5734 Src1, Index2, Src0, Index3);
5735 T = lowerShuffleVector_TwoFromSameSrc(
5736 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5737 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5738 }
5739 }
5740 break;
5741 CASE_SRCS_IN(0, 1, 1, 1) : {
5742 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5743 Src1, Index1);
5744 T = lowerShuffleVector_TwoFromSameSrc(
5745 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5746 }
5747 break;
5748 CASE_SRCS_IN(1, 0, 0, 0) : {
5749 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5750 Src0, Index1);
5751 T = lowerShuffleVector_TwoFromSameSrc(
5752 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5753 }
5754 break;
5755 CASE_SRCS_IN(1, 0, 0, 1) : {
5756 if (Index0 == Index3 && Index1 == Index2) {
5757 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5758 Src1, Index0, Src0, Index1);
5759 T = lowerShuffleVector_AllFromSameSrc(
5760 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5761 UNIFIED_INDEX_0);
5762 } else {
5763 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5764 Src1, Index0, Src0, Index1);
5765 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5766 Src0, Index2, Src1, Index3);
5767 T = lowerShuffleVector_TwoFromSameSrc(
5768 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5769 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5770 }
5771 }
5772 break;
5773 CASE_SRCS_IN(1, 0, 1, 0) : {
5774 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
5775 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
5776 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
5777 auto *Src0R = legalizeToReg(Src1);
5778 T = makeReg(DestTy);
5779 _movp(T, Src0R);
5780 _punpckl(T, Src1RM);
5781 } else if (Index0 == Index2 && Index1 == Index3) {
5782 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5783 Src1, Index0, Src0, Index1);
5784 T = lowerShuffleVector_AllFromSameSrc(
5785 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5786 UNIFIED_INDEX_1);
5787 } else {
5788 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5789 Src1, Index0, Src0, Index1);
5790 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5791 Src1, Index2, Src0, Index3);
5792 T = lowerShuffleVector_TwoFromSameSrc(
5793 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5794 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5795 }
5796 }
5797 break;
5798 CASE_SRCS_IN(1, 0, 1, 1) : {
5799 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5800 Src0, Index1);
5801 T = lowerShuffleVector_TwoFromSameSrc(
5802 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5803 }
5804 break;
5805 CASE_SRCS_IN(1, 1, 0, 0) : {
5806 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
5807 Index2, Index3);
5808 }
5809 break;
5810 CASE_SRCS_IN(1, 1, 0, 1) : {
5811 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5812 Src1, Index3);
5813 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
5814 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5815 }
5816 break;
5817 CASE_SRCS_IN(1, 1, 1, 0) : {
5818 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5819 Src0, Index3);
5820 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
5821 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5822 }
5823 break;
5824 CASE_SRCS_IN(1, 1, 1, 1) : {
5825 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
5826 Index3);
5827 }
5828 break;
5829 #undef CASE_SRCS_IN
5830 }
5831
5832 assert(T != nullptr);
5833 assert(T->getType() == DestTy);
5834 _movp(Dest, T);
5835 return;
5836 } break;
5585 } 5837 }
5586 5838
5587 // Unoptimized shuffle. Perform a series of inserts and extracts. 5839 // Unoptimized shuffle. Perform a series of inserts and extracts.
5588 Context.insert<InstFakeDef>(T); 5840 Context.insert<InstFakeDef>(T);
5589 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5590 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5591 const SizeT NumElements = typeNumElements(DestTy);
5592 const Type ElementType = typeElementType(DestTy); 5841 const Type ElementType = typeElementType(DestTy);
5593 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { 5842 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5594 auto *Index = Instr->getIndex(I); 5843 auto *Index = Instr->getIndex(I);
5595 const SizeT Elem = Index->getValue(); 5844 const SizeT Elem = Index->getValue();
5596 auto *ExtElmt = makeReg(ElementType); 5845 auto *ExtElmt = makeReg(ElementType);
5597 if (Elem < NumElements) { 5846 if (Elem < NumElements) {
5598 lowerExtractElement( 5847 lowerExtractElement(
5599 InstExtractElement::create(Func, ExtElmt, Src0, Index)); 5848 InstExtractElement::create(Func, ExtElmt, Src0, Index));
5600 } else { 5849 } else {
5601 lowerExtractElement(InstExtractElement::create( 5850 lowerExtractElement(InstExtractElement::create(
(...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after
7480 emitGlobal(*Var, SectionSuffix); 7729 emitGlobal(*Var, SectionSuffix);
7481 } 7730 }
7482 } 7731 }
7483 } break; 7732 } break;
7484 } 7733 }
7485 } 7734 }
7486 } // end of namespace X86NAMESPACE 7735 } // end of namespace X86NAMESPACE
7487 } // end of namespace Ice 7736 } // end of namespace Ice
7488 7737
7489 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7738 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698