OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 5592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5603 } | 5603 } |
5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
5605 // explicitly looks for a ret instruction as a marker for where to insert the | 5605 // explicitly looks for a ret instruction as a marker for where to insert the |
5606 // frame removal instructions. | 5606 // frame removal instructions. |
5607 _ret(Reg); | 5607 _ret(Reg); |
5608 // Add a fake use of esp to make sure esp stays alive for the entire | 5608 // Add a fake use of esp to make sure esp stays alive for the entire |
5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
5610 keepEspLiveAtExit(); | 5610 keepEspLiveAtExit(); |
5611 } | 5611 } |
5612 | 5612 |
5613 inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
5614 SizeT Index3) { | |
5615 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | | |
5616 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); | |
5617 assert(Mask < 256); | |
5618 return Mask; | |
5619 } | |
5620 | |
5621 template <typename TraitsType> | |
5622 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( | |
5623 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { | |
5624 constexpr SizeT SrcBit = 1 << 2; | |
5625 assert((Index0 & SrcBit) == (Index1 & SrcBit)); | |
5626 assert((Index0 & SrcBit) == (Index2 & SrcBit)); | |
5627 assert((Index0 & SrcBit) == (Index3 & SrcBit)); | |
5628 (void)SrcBit; | |
5629 | |
5630 const Type SrcTy = Src->getType(); | |
5631 auto *T = makeReg(SrcTy); | |
5632 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); | |
5633 auto *Mask = | |
5634 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
5635 _pshufd(T, SrcRM, Mask); | |
5636 return T; | |
5637 } | |
5638 | |
5639 template <typename TraitsType> | |
5640 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( | |
5641 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, | |
5642 SizeT Index3) { | |
5643 constexpr SizeT SrcBit = 1 << 2; | |
5644 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); | |
5645 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); | |
5646 (void)SrcBit; | |
5647 | |
5648 const Type SrcTy = Src0->getType(); | |
5649 assert(Src1->getType() == SrcTy); | |
5650 auto *T = makeReg(SrcTy); | |
5651 auto *Src0R = legalizeToReg(Src0); | |
5652 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
5653 auto *Mask = | |
5654 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
5655 _movp(T, Src0R); | |
5656 _shufps(T, Src1RM, Mask); | |
5657 return T; | |
5658 } | |
5659 | |
5660 template <typename TraitsType> | |
5661 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( | |
5662 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { | |
5663 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, | |
5664 Index1, IGNORE_INDEX); | |
5665 } | |
5666 | |
5667 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
5668 SizeT Index3) { | |
5669 constexpr SizeT SrcBit = 1 << 2; | |
5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | |
5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | |
5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | |
5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | |
5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | |
5675 } | |
5676 | |
5613 template <typename TraitsType> | 5677 template <typename TraitsType> |
5614 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5678 void TargetX86Base<TraitsType>::lowerShuffleVector( |
5615 const InstShuffleVector *Instr) { | 5679 const InstShuffleVector *Instr) { |
5616 auto *Dest = Instr->getDest(); | 5680 auto *Dest = Instr->getDest(); |
5617 const Type DestTy = Dest->getType(); | 5681 const Type DestTy = Dest->getType(); |
5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
5684 const SizeT NumElements = typeNumElements(DestTy); | |
5618 | 5685 |
5619 auto *T = makeReg(DestTy); | 5686 auto *T = makeReg(DestTy); |
5620 | 5687 |
5621 switch (DestTy) { | 5688 switch (DestTy) { |
5622 default: | 5689 default: |
5623 break; | 5690 break; |
5624 // TODO(jpp): figure out how to properly lower this without scalarization. | 5691 // TODO(jpp): figure out how to properly lower the remaining cases without |
5692 // scalarization. | |
5693 case IceType_v4i1: | |
5694 case IceType_v4i32: | |
5695 case IceType_v4f32: { | |
5696 static constexpr SizeT ExpectedNumElements = 4; | |
5697 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
5702 Variable *T = nullptr; | |
5703 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { | |
5704 #define CASE_SRCS_IN(S0, S1, S2, S3) \ | |
5705 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) | |
5706 CASE_SRCS_IN(0, 0, 0, 0) : { | |
5707 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, | |
5708 Index3); | |
5709 } | |
5710 break; | |
5711 CASE_SRCS_IN(0, 0, 0, 1) : { | |
5712 assert(false); | |
Jim Stichnoth
2016/04/25 23:09:03
Please change all these to something like:
assert
John
2016/04/26 11:14:13
Done.
| |
5713 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
5714 Src1, Index3); | |
5715 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
5716 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5717 } | |
5718 break; | |
5719 CASE_SRCS_IN(0, 0, 1, 0) : { | |
5720 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
5721 Src0, Index3); | |
5722 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
5723 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5724 } | |
5725 break; | |
5726 CASE_SRCS_IN(0, 0, 1, 1) : { | |
5727 assert(false); | |
5728 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, | |
5729 Index2, Index3); | |
5730 } | |
5731 break; | |
5732 CASE_SRCS_IN(0, 1, 0, 0) : { | |
5733 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
5734 Src1, Index1); | |
5735 T = lowerShuffleVector_TwoFromSameSrc( | |
5736 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
5737 } | |
5738 break; | |
5739 CASE_SRCS_IN(0, 1, 0, 1) : { | |
5740 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && | |
5741 (Index3 - ExpectedNumElements) == 1) { | |
5742 assert(false); | |
5743 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
5744 auto *Src0R = legalizeToReg(Src0); | |
5745 T = makeReg(DestTy); | |
5746 _movp(T, Src0R); | |
5747 _punpckl(T, Src1RM); | |
5748 } else if (Index0 == Index2 && Index1 == Index3) { | |
5749 assert(false); | |
5750 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5751 Src0, Index0, Src1, Index1); | |
5752 T = lowerShuffleVector_AllFromSameSrc( | |
5753 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
5754 UNIFIED_INDEX_1); | |
5755 } else { | |
5756 assert(false); | |
5757 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5758 Src0, Index0, Src1, Index1); | |
5759 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5760 Src0, Index2, Src1, Index3); | |
5761 T = lowerShuffleVector_TwoFromSameSrc( | |
5762 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5763 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5764 } | |
5765 } | |
5766 break; | |
5767 CASE_SRCS_IN(0, 1, 1, 0) : { | |
5768 if (Index0 == Index3 && Index1 == Index2) { | |
5769 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5770 Src0, Index0, Src1, Index1); | |
5771 T = lowerShuffleVector_AllFromSameSrc( | |
5772 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
5773 UNIFIED_INDEX_0); | |
5774 } else { | |
5775 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5776 Src0, Index0, Src1, Index1); | |
5777 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5778 Src1, Index2, Src0, Index3); | |
5779 T = lowerShuffleVector_TwoFromSameSrc( | |
5780 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5781 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5782 } | |
5783 } | |
5784 break; | |
5785 CASE_SRCS_IN(0, 1, 1, 1) : { | |
5786 assert(false); | |
5787 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
5788 Src1, Index1); | |
5789 T = lowerShuffleVector_TwoFromSameSrc( | |
5790 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
5791 } | |
5792 break; | |
5793 CASE_SRCS_IN(1, 0, 0, 0) : { | |
5794 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
5795 Src0, Index1); | |
5796 T = lowerShuffleVector_TwoFromSameSrc( | |
5797 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
5798 } | |
5799 break; | |
5800 CASE_SRCS_IN(1, 0, 0, 1) : { | |
5801 if (Index0 == Index3 && Index1 == Index2) { | |
5802 assert(false); | |
5803 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5804 Src1, Index0, Src0, Index1); | |
5805 T = lowerShuffleVector_AllFromSameSrc( | |
5806 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
5807 UNIFIED_INDEX_0); | |
5808 } else { | |
5809 assert(false); | |
5810 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5811 Src1, Index0, Src0, Index1); | |
5812 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5813 Src0, Index2, Src1, Index3); | |
5814 T = lowerShuffleVector_TwoFromSameSrc( | |
5815 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5816 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5817 } | |
5818 } | |
5819 break; | |
5820 CASE_SRCS_IN(1, 0, 1, 0) : { | |
5821 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && | |
5822 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { | |
5823 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
5824 auto *Src0R = legalizeToReg(Src1); | |
5825 T = makeReg(DestTy); | |
5826 _movp(T, Src0R); | |
5827 _punpckl(T, Src1RM); | |
5828 } else if (Index0 == Index2 && Index1 == Index3) { | |
5829 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5830 Src1, Index0, Src0, Index1); | |
5831 T = lowerShuffleVector_AllFromSameSrc( | |
5832 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
5833 UNIFIED_INDEX_1); | |
5834 } else { | |
5835 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5836 Src1, Index0, Src0, Index1); | |
5837 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5838 Src1, Index2, Src0, Index3); | |
5839 T = lowerShuffleVector_TwoFromSameSrc( | |
5840 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5841 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5842 } | |
5843 } | |
5844 break; | |
5845 CASE_SRCS_IN(1, 0, 1, 1) : { | |
5846 assert(false); | |
5847 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
5848 Src0, Index1); | |
5849 T = lowerShuffleVector_TwoFromSameSrc( | |
5850 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
5851 } | |
5852 break; | |
5853 CASE_SRCS_IN(1, 1, 0, 0) : { | |
5854 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, | |
5855 Index2, Index3); | |
5856 } | |
5857 break; | |
5858 CASE_SRCS_IN(1, 1, 0, 1) : { | |
5859 assert(false); | |
5860 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
5861 Src1, Index3); | |
5862 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
5863 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5864 } | |
5865 break; | |
5866 CASE_SRCS_IN(1, 1, 1, 0) : { | |
5867 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
5868 Src0, Index3); | |
5869 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
5870 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5871 } | |
5872 break; | |
5873 CASE_SRCS_IN(1, 1, 1, 1) : { | |
5874 assert(false); | |
5875 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, | |
5876 Index3); | |
5877 } | |
5878 break; | |
5879 #undef CASE_SRCS_IN | |
5880 } | |
5881 | |
5882 assert(T != nullptr); | |
5883 assert(T->getType() == DestTy); | |
5884 _movp(Dest, T); | |
5885 return; | |
5886 } break; | |
5625 } | 5887 } |
5626 | 5888 |
5627 // Unoptimized shuffle. Perform a series of inserts and extracts. | 5889 // Unoptimized shuffle. Perform a series of inserts and extracts. |
5628 Context.insert<InstFakeDef>(T); | 5890 Context.insert<InstFakeDef>(T); |
5629 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
5630 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
5631 const SizeT NumElements = typeNumElements(DestTy); | |
5632 const Type ElementType = typeElementType(DestTy); | 5891 const Type ElementType = typeElementType(DestTy); |
5633 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 5892 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { |
5634 auto *Index = Instr->getIndex(I); | 5893 auto *Index = Instr->getIndex(I); |
5635 const SizeT Elem = Index->getValue(); | 5894 const SizeT Elem = Index->getValue(); |
5636 auto *ExtElmt = makeReg(ElementType); | 5895 auto *ExtElmt = makeReg(ElementType); |
5637 if (Elem < NumElements) { | 5896 if (Elem < NumElements) { |
5638 lowerExtractElement( | 5897 lowerExtractElement( |
5639 InstExtractElement::create(Func, ExtElmt, Src0, Index)); | 5898 InstExtractElement::create(Func, ExtElmt, Src0, Index)); |
5640 } else { | 5899 } else { |
5641 lowerExtractElement(InstExtractElement::create( | 5900 lowerExtractElement(InstExtractElement::create( |
(...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
7520 emitGlobal(*Var, SectionSuffix); | 7779 emitGlobal(*Var, SectionSuffix); |
7521 } | 7780 } |
7522 } | 7781 } |
7523 } break; | 7782 } break; |
7524 } | 7783 } |
7525 } | 7784 } |
7526 } // end of namespace X86NAMESPACE | 7785 } // end of namespace X86NAMESPACE |
7527 } // end of namespace Ice | 7786 } // end of namespace Ice |
7528 | 7787 |
7529 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7788 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |