Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(368)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1909013002: Subzero. X86. Lowers shufflevector using xmm instructions. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/vector-shuffle.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 5592 matching lines...) Expand 10 before | Expand all | Expand 10 after
5603 } 5603 }
5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog 5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5605 // explicitly looks for a ret instruction as a marker for where to insert the 5605 // explicitly looks for a ret instruction as a marker for where to insert the
5606 // frame removal instructions. 5606 // frame removal instructions.
5607 _ret(Reg); 5607 _ret(Reg);
5608 // Add a fake use of esp to make sure esp stays alive for the entire 5608 // Add a fake use of esp to make sure esp stays alive for the entire
5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. 5609 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5610 keepEspLiveAtExit(); 5610 keepEspLiveAtExit();
5611 } 5611 }
5612 5612
5613 inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2,
5614 SizeT Index3) {
5615 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) |
5616 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6);
5617 assert(Mask < 256);
5618 return Mask;
5619 }
5620
5621 template <typename TraitsType>
5622 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc(
5623 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) {
5624 constexpr SizeT SrcBit = 1 << 2;
5625 assert((Index0 & SrcBit) == (Index1 & SrcBit));
5626 assert((Index0 & SrcBit) == (Index2 & SrcBit));
5627 assert((Index0 & SrcBit) == (Index3 & SrcBit));
5628 (void)SrcBit;
5629
5630 const Type SrcTy = Src->getType();
5631 auto *T = makeReg(SrcTy);
5632 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
5633 auto *Mask =
5634 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5635 _pshufd(T, SrcRM, Mask);
5636 return T;
5637 }
5638
5639 template <typename TraitsType>
5640 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc(
5641 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2,
5642 SizeT Index3) {
5643 constexpr SizeT SrcBit = 1 << 2;
5644 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX));
5645 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX));
5646 (void)SrcBit;
5647
5648 const Type SrcTy = Src0->getType();
5649 assert(Src1->getType() == SrcTy);
5650 auto *T = makeReg(SrcTy);
5651 auto *Src0R = legalizeToReg(Src0);
5652 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5653 auto *Mask =
5654 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3));
5655 _movp(T, Src0R);
5656 _shufps(T, Src1RM, Mask);
5657 return T;
5658 }
5659
5660 template <typename TraitsType>
5661 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs(
5662 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) {
5663 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1,
5664 Index1, IGNORE_INDEX);
5665 }
5666
5667 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2,
5668 SizeT Index3) {
5669 constexpr SizeT SrcBit = 1 << 2;
5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
5675 }
5676
5613 template <typename TraitsType> 5677 template <typename TraitsType>
5614 void TargetX86Base<TraitsType>::lowerShuffleVector( 5678 void TargetX86Base<TraitsType>::lowerShuffleVector(
5615 const InstShuffleVector *Instr) { 5679 const InstShuffleVector *Instr) {
5616 auto *Dest = Instr->getDest(); 5680 auto *Dest = Instr->getDest();
5617 const Type DestTy = Dest->getType(); 5681 const Type DestTy = Dest->getType();
5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5684 const SizeT NumElements = typeNumElements(DestTy);
5618 5685
5619 auto *T = makeReg(DestTy); 5686 auto *T = makeReg(DestTy);
5620 5687
5621 switch (DestTy) { 5688 switch (DestTy) {
5622 default: 5689 default:
5623 break; 5690 break;
5624 // TODO(jpp): figure out how to properly lower this without scalarization. 5691 // TODO(jpp): figure out how to properly lower the remaining cases without
5692 // scalarization.
5693 case IceType_v4i1:
5694 case IceType_v4i32:
5695 case IceType_v4f32: {
5696 static constexpr SizeT ExpectedNumElements = 4;
5697 assert(ExpectedNumElements == Instr->getNumIndexes());
5698 const SizeT Index0 = Instr->getIndex(0)->getValue();
5699 const SizeT Index1 = Instr->getIndex(1)->getValue();
5700 const SizeT Index2 = Instr->getIndex(2)->getValue();
5701 const SizeT Index3 = Instr->getIndex(3)->getValue();
5702 Variable *T = nullptr;
5703 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) {
5704 #define CASE_SRCS_IN(S0, S1, S2, S3) \
5705 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3))
5706 CASE_SRCS_IN(0, 0, 0, 0) : {
5707 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2,
5708 Index3);
5709 }
5710 break;
5711 CASE_SRCS_IN(0, 0, 0, 1) : {
5712 assert(false && "Following code is untested but likely correct; test "
5713 "and remove assert.");
5714 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5715 Src1, Index3);
5716 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5717 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5718 }
5719 break;
5720 CASE_SRCS_IN(0, 0, 1, 0) : {
5721 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5722 Src0, Index3);
5723 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified,
5724 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5725 }
5726 break;
5727 CASE_SRCS_IN(0, 0, 1, 1) : {
5728 assert(false && "Following code is untested but likely correct; test "
5729 "and remove assert.");
5730 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1,
5731 Index2, Index3);
5732 }
5733 break;
5734 CASE_SRCS_IN(0, 1, 0, 0) : {
5735 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5736 Src1, Index1);
5737 T = lowerShuffleVector_TwoFromSameSrc(
5738 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5739 }
5740 break;
5741 CASE_SRCS_IN(0, 1, 0, 1) : {
5742 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 &&
5743 (Index3 - ExpectedNumElements) == 1) {
5744 assert(false && "Following code is untested but likely correct; test "
5745 "and remove assert.");
5746 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5747 auto *Src0R = legalizeToReg(Src0);
5748 T = makeReg(DestTy);
5749 _movp(T, Src0R);
5750 _punpckl(T, Src1RM);
5751 } else if (Index0 == Index2 && Index1 == Index3) {
5752 assert(false && "Following code is untested but likely correct; test "
5753 "and remove assert.");
5754 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5755 Src0, Index0, Src1, Index1);
5756 T = lowerShuffleVector_AllFromSameSrc(
5757 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5758 UNIFIED_INDEX_1);
5759 } else {
5760 assert(false && "Following code is untested but likely correct; test "
5761 "and remove assert.");
5762 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5763 Src0, Index0, Src1, Index1);
5764 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5765 Src0, Index2, Src1, Index3);
5766 T = lowerShuffleVector_TwoFromSameSrc(
5767 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5768 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5769 }
5770 }
5771 break;
5772 CASE_SRCS_IN(0, 1, 1, 0) : {
5773 if (Index0 == Index3 && Index1 == Index2) {
5774 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5775 Src0, Index0, Src1, Index1);
5776 T = lowerShuffleVector_AllFromSameSrc(
5777 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5778 UNIFIED_INDEX_0);
5779 } else {
5780 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5781 Src0, Index0, Src1, Index1);
5782 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5783 Src1, Index2, Src0, Index3);
5784 T = lowerShuffleVector_TwoFromSameSrc(
5785 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5786 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5787 }
5788 }
5789 break;
5790 CASE_SRCS_IN(0, 1, 1, 1) : {
5791 assert(false && "Following code is untested but likely correct; test "
5792 "and remove assert.");
5793 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0,
5794 Src1, Index1);
5795 T = lowerShuffleVector_TwoFromSameSrc(
5796 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5797 }
5798 break;
5799 CASE_SRCS_IN(1, 0, 0, 0) : {
5800 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5801 Src0, Index1);
5802 T = lowerShuffleVector_TwoFromSameSrc(
5803 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3);
5804 }
5805 break;
5806 CASE_SRCS_IN(1, 0, 0, 1) : {
5807 if (Index0 == Index3 && Index1 == Index2) {
5808 assert(false && "Following code is untested but likely correct; test "
5809 "and remove assert.");
5810 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5811 Src1, Index0, Src0, Index1);
5812 T = lowerShuffleVector_AllFromSameSrc(
5813 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1,
5814 UNIFIED_INDEX_0);
5815 } else {
5816 assert(false && "Following code is untested but likely correct; test "
5817 "and remove assert.");
5818 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5819 Src1, Index0, Src0, Index1);
5820 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5821 Src0, Index2, Src1, Index3);
5822 T = lowerShuffleVector_TwoFromSameSrc(
5823 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5824 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5825 }
5826 }
5827 break;
5828 CASE_SRCS_IN(1, 0, 1, 0) : {
5829 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 &&
5830 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) {
5831 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem);
5832 auto *Src0R = legalizeToReg(Src1);
5833 T = makeReg(DestTy);
5834 _movp(T, Src0R);
5835 _punpckl(T, Src1RM);
5836 } else if (Index0 == Index2 && Index1 == Index3) {
5837 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(
5838 Src1, Index0, Src0, Index1);
5839 T = lowerShuffleVector_AllFromSameSrc(
5840 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0,
5841 UNIFIED_INDEX_1);
5842 } else {
5843 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs(
5844 Src1, Index0, Src0, Index1);
5845 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs(
5846 Src1, Index2, Src0, Index3);
5847 T = lowerShuffleVector_TwoFromSameSrc(
5848 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1,
5849 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5850 }
5851 }
5852 break;
5853 CASE_SRCS_IN(1, 0, 1, 1) : {
5854 assert(false && "Following code is untested but likely correct; test "
5855 "and remove assert.");
5856 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0,
5857 Src0, Index1);
5858 T = lowerShuffleVector_TwoFromSameSrc(
5859 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3);
5860 }
5861 break;
5862 CASE_SRCS_IN(1, 1, 0, 0) : {
5863 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0,
5864 Index2, Index3);
5865 }
5866 break;
5867 CASE_SRCS_IN(1, 1, 0, 1) : {
5868 assert(false && "Following code is untested but likely correct; test "
5869 "and remove assert.");
5870 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2,
5871 Src1, Index3);
5872 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
5873 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5874 }
5875 break;
5876 CASE_SRCS_IN(1, 1, 1, 0) : {
5877 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2,
5878 Src0, Index3);
5879 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified,
5880 UNIFIED_INDEX_0, UNIFIED_INDEX_1);
5881 }
5882 break;
5883 CASE_SRCS_IN(1, 1, 1, 1) : {
5884 assert(false && "Following code is untested but likely correct; test "
5885 "and remove assert.");
5886 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2,
5887 Index3);
5888 }
5889 break;
5890 #undef CASE_SRCS_IN
5891 }
5892
5893 assert(T != nullptr);
5894 assert(T->getType() == DestTy);
5895 _movp(Dest, T);
5896 return;
5897 } break;
5625 } 5898 }
5626 5899
5627 // Unoptimized shuffle. Perform a series of inserts and extracts. 5900 // Unoptimized shuffle. Perform a series of inserts and extracts.
5628 Context.insert<InstFakeDef>(T); 5901 Context.insert<InstFakeDef>(T);
5629 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5630 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5631 const SizeT NumElements = typeNumElements(DestTy);
5632 const Type ElementType = typeElementType(DestTy); 5902 const Type ElementType = typeElementType(DestTy);
5633 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { 5903 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
5634 auto *Index = Instr->getIndex(I); 5904 auto *Index = Instr->getIndex(I);
5635 const SizeT Elem = Index->getValue(); 5905 const SizeT Elem = Index->getValue();
5636 auto *ExtElmt = makeReg(ElementType); 5906 auto *ExtElmt = makeReg(ElementType);
5637 if (Elem < NumElements) { 5907 if (Elem < NumElements) {
5638 lowerExtractElement( 5908 lowerExtractElement(
5639 InstExtractElement::create(Func, ExtElmt, Src0, Index)); 5909 InstExtractElement::create(Func, ExtElmt, Src0, Index));
5640 } else { 5910 } else {
5641 lowerExtractElement(InstExtractElement::create( 5911 lowerExtractElement(InstExtractElement::create(
(...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after
7520 emitGlobal(*Var, SectionSuffix); 7790 emitGlobal(*Var, SectionSuffix);
7521 } 7791 }
7522 } 7792 }
7523 } break; 7793 } break;
7524 } 7794 }
7525 } 7795 }
7526 } // end of namespace X86NAMESPACE 7796 } // end of namespace X86NAMESPACE
7527 } // end of namespace Ice 7797 } // end of namespace Ice
7528 7798
7529 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7799 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/vector-shuffle.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698