Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(355)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1909013002: Subzero. X86. Lowers shufflevector using xmm instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/vector-shuffle.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 5592 matching lines...) Expand 10 before | Expand all | Expand 10 after
5603 } 5603 }
5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog 5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5605 // explicitly looks for a ret instruction as a marker for where to insert the 5605 // explicitly looks for a ret instruction as a marker for where to insert the
5606 // frame removal instructions. 5606 // frame removal instructions.
5607 _ret(Reg); 5607 _ret(Reg);
5608 // Add a fake use of esp to make sure esp stays alive for the entire 5608 // Add a fake use of esp to make sure esp stays alive for the entire
5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. 5609 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5610 keepEspLiveAtExit(); 5610 keepEspLiveAtExit();
5611 } 5611 }
5612 5612
5613 inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
5614 SizeT Index3) {
5615 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
5616 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
5617 assert(Mask < 256);
5618 return Mask;
5619 }
5620
5621 template <typename TraitsType>
5622 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
5623 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
5624 constexpr SizeT SrcBit = 1 << 2;
5625 assert((Index0 & SrcBit) == (Index1 & SrcBit));
5626 assert((Index0 & SrcBit) == (Index2 & SrcBit));
5627 assert((Index0 & SrcBit) == (Index3 & SrcBit));
5628 (void)SrcBit;
5629
5630 const Type SrcTy = Src->getType();
5631 auto *T = makeReg(SrcTy);
5632 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
5633 auto *Mask =
5634 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5635 _pshufd(T, SrcRM, Mask);
5636 return T;
5637 }
5638
5639 template <typename TraitsType>
5640 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
5641 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
5642 SizeT Index3) {
5643 constexpr SizeT SrcBit = 1 << 2;
5644 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
5645 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
5646 (void)SrcBit;
5647
5648 const Type SrcTy = Src0->getType();
5649 assert(Src1->getType() == SrcTy);
5650 auto *T = makeReg(SrcTy);
5651 auto *Src0R = legalizeToReg(Src0);
5652 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5653 auto *Mask =
5654 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5655 _movp(T, Src0R);
5656 _shufps(T, Src1RM, Mask);
5657 return T;
5658 }
5659
5660 template <typename TraitsType>
5661 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
5662 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
5663 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
5664 Index1, IGNORE_INDEX);
5665 }
5666
5667 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
5668 SizeT Index3) {
5669 constexpr SizeT SrcBit = 1 << 2;
5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
5675 }
5676
5613 template <typename TraitsType> 5677 template <typename TraitsType>
5614 void TargetX86Base<TraitsType>::lowerShuffleVector( 5678 void TargetX86Base<TraitsType>::lowerShuffleVector(
5615 const InstShuffleVector *Instr) { 5679 const InstShuffleVector *Instr) {
5616 auto *Dest = Instr->getDest(); 5680 auto *Dest = Instr->getDest();
5617 const Type DestTy = Dest->getType(); 5681 const Type DestTy = Dest->getType();
5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5684 const SizeT NumElements = typeNumElements(DestTy);
5618 5685
5619 auto *T = makeReg(DestTy); 5686 auto *T = makeReg(DestTy);
5620 5687
5621 switch (DestTy) { 5688 switch (DestTy) {
5622 default: 5689 default:
5623 break; 5690 break;
5624 // TODO(jpp): figure out how to properly lower this without scalarization. 5691 // TODO(jpp): figure out how to properly lower the remaining cases without
5692 // scalarization.
5693 case IceType_v4i1:
5694 case IceType_v4i32:
5695 case IceType_v4f32: {
5696 static constexpr SizeT ExpectedNumElements = 4;
5697 assert(ExpectedNumElements == Instr->getNumIndexes());
5698 const SizeT Index0 = Instr->getIndex(0)->getValue();
5699 const SizeT Index1 = Instr->getIndex(1)->getValue();
5700 const SizeT Index2 = Instr->getIndex(2)->getValue();
5701 const SizeT Index3 = Instr->getIndex(3)->getValue();
5702 Variable *T = nullptr;
5703 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
5704 #define CASE_SRCS_IN(S0, S1, S2, S3) \
5705 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
5706 CASE_SRCS_IN(0, 0, 0, 0) : {
5707 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
5708 Index3);
5709 }
5710 break;
5711 CASE_SRCS_IN(0, 0, 0, 1) : {
5712 assert(false);
Jim Stichnoth 2016/04/25 23:09:03 Please change all these to something like: assert
John 2016/04/26 11:14:13 Done.
5713 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5714 Src1, Index3);
5715 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5716 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5717 }
5718 break;
5719 CASE_SRCS_IN(0, 0, 1, 0) : {
5720 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5721 Src0, Index3);
5722 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5723 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5724 }
5725 break;
5726 CASE_SRCS_IN(0, 0, 1, 1) : {
5727 assert(false);
5728 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
5729 Index2, Index3);
5730 }
5731 break;
5732 CASE_SRCS_IN(0, 1, 0, 0) : {
5733 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5734 Src1, Index1);
5735 T = lowerShuffleVector_TwoFromSameSrc(
5736 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5737 }
5738 break;
5739 CASE_SRCS_IN(0, 1, 0, 1) : {
5740 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
5741 (Index3 - ExpectedNumElements) == 1) {
5742 assert(false);
5743 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5744 auto *Src0R = legalizeToReg(Src0);
5745 T = makeReg(DestTy);
5746 _movp(T, Src0R);
5747 _punpckl(T, Src1RM);
5748 } else if (Index0 == Index2 && Index1 == Index3) {
5749 assert(false);
5750 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5751 Src0, Index0, Src1, Index1);
5752 T = lowerShuffleVector_AllFromSameSrc(
5753 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5754 UNIFIED_INDEX_1);
5755 } else {
5756 assert(false);
5757 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5758 Src0, Index0, Src1, Index1);
5759 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5760 Src0, Index2, Src1, Index3);
5761 T = lowerShuffleVector_TwoFromSameSrc(
5762 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5763 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5764 }
5765 }
5766 break;
5767 CASE_SRCS_IN(0, 1, 1, 0) : {
5768 if (Index0 == Index3 && Index1 == Index2) {
5769 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5770 Src0, Index0, Src1, Index1);
5771 T = lowerShuffleVector_AllFromSameSrc(
5772 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5773 UNIFIED_INDEX_0);
5774 } else {
5775 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5776 Src0, Index0, Src1, Index1);
5777 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5778 Src1, Index2, Src0, Index3);
5779 T = lowerShuffleVector_TwoFromSameSrc(
5780 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5781 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5782 }
5783 }
5784 break;
5785 CASE_SRCS_IN(0, 1, 1, 1) : {
5786 assert(false);
5787 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5788 Src1, Index1);
5789 T = lowerShuffleVector_TwoFromSameSrc(
5790 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5791 }
5792 break;
5793 CASE_SRCS_IN(1, 0, 0, 0) : {
5794 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5795 Src0, Index1);
5796 T = lowerShuffleVector_TwoFromSameSrc(
5797 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5798 }
5799 break;
5800 CASE_SRCS_IN(1, 0, 0, 1) : {
5801 if (Index0 == Index3 && Index1 == Index2) {
5802 assert(false);
5803 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5804 Src1, Index0, Src0, Index1);
5805 T = lowerShuffleVector_AllFromSameSrc(
5806 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5807 UNIFIED_INDEX_0);
5808 } else {
5809 assert(false);
5810 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5811 Src1, Index0, Src0, Index1);
5812 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5813 Src0, Index2, Src1, Index3);
5814 T = lowerShuffleVector_TwoFromSameSrc(
5815 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5816 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5817 }
5818 }
5819 break;
5820 CASE_SRCS_IN(1, 0, 1, 0) : {
5821 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
5822 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
5823 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
5824 auto *Src0R = legalizeToReg(Src1);
5825 T = makeReg(DestTy);
5826 _movp(T, Src0R);
5827 _punpckl(T, Src1RM);
5828 } else if (Index0 == Index2 && Index1 == Index3) {
5829 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5830 Src1, Index0, Src0, Index1);
5831 T = lowerShuffleVector_AllFromSameSrc(
5832 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5833 UNIFIED_INDEX_1);
5834 } else {
5835 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5836 Src1, Index0, Src0, Index1);
5837 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5838 Src1, Index2, Src0, Index3);
5839 T = lowerShuffleVector_TwoFromSameSrc(
5840 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5841 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5842 }
5843 }
5844 break;
5845 CASE_SRCS_IN(1, 0, 1, 1) : {
5846 assert(false);
5847 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5848 Src0, Index1);
5849 T = lowerShuffleVector_TwoFromSameSrc(
5850 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5851 }
5852 break;
5853 CASE_SRCS_IN(1, 1, 0, 0) : {
5854 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
5855 Index2, Index3);
5856 }
5857 break;
5858 CASE_SRCS_IN(1, 1, 0, 1) : {
5859 assert(false);
5860 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5861 Src1, Index3);
5862 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
5863 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5864 }
5865 break;
5866 CASE_SRCS_IN(1, 1, 1, 0) : {
5867 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5868 Src0, Index3);
5869 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
5870 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5871 }
5872 break;
5873 CASE_SRCS_IN(1, 1, 1, 1) : {
5874 assert(false);
5875 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
5876 Index3);
5877 }
5878 break;
5879 #undef CASE_SRCS_IN
5880 }
5881
5882 assert(T != nullptr);
5883 assert(T->getType() == DestTy);
5884 _movp(Dest, T);
5885 return;
5886 } break;
5625 } 5887 }
5626 5888
5627 // Unoptimized shuffle. Perform a series of inserts and extracts. 5889 // Unoptimized shuffle. Perform a series of inserts and extracts.
5628 Context.insert<InstFakeDef>(T); 5890 Context.insert<InstFakeDef>(T);
5629 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5630 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5631 const SizeT NumElements = typeNumElements(DestTy);
5632 const Type ElementType = typeElementType(DestTy); 5891 const Type ElementType = typeElementType(DestTy);
5633 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { 5892 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5634 auto *Index = Instr->getIndex(I); 5893 auto *Index = Instr->getIndex(I);
5635 const SizeT Elem = Index->getValue(); 5894 const SizeT Elem = Index->getValue();
5636 auto *ExtElmt = makeReg(ElementType); 5895 auto *ExtElmt = makeReg(ElementType);
5637 if (Elem < NumElements) { 5896 if (Elem < NumElements) {
5638 lowerExtractElement( 5897 lowerExtractElement(
5639 InstExtractElement::create(Func, ExtElmt, Src0, Index)); 5898 InstExtractElement::create(Func, ExtElmt, Src0, Index));
5640 } else { 5899 } else {
5641 lowerExtractElement(InstExtractElement::create( 5900 lowerExtractElement(InstExtractElement::create(
(...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after
7520 emitGlobal(*Var, SectionSuffix); 7779 emitGlobal(*Var, SectionSuffix);
7521 } 7780 }
7522 } 7781 }
7523 } break; 7782 } break;
7524 } 7783 }
7525 } 7784 }
7526 } // end of namespace X86NAMESPACE 7785 } // end of namespace X86NAMESPACE
7527 } // end of namespace Ice 7786 } // end of namespace Ice
7528 7787
7529 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7788 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/vector-shuffle.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698