OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 5592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5603 } | 5603 } |
5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
5605 // explicitly looks for a ret instruction as a marker for where to insert the | 5605 // explicitly looks for a ret instruction as a marker for where to insert the |
5606 // frame removal instructions. | 5606 // frame removal instructions. |
5607 _ret(Reg); | 5607 _ret(Reg); |
5608 // Add a fake use of esp to make sure esp stays alive for the entire | 5608 // Add a fake use of esp to make sure esp stays alive for the entire |
5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
5610 keepEspLiveAtExit(); | 5610 keepEspLiveAtExit(); |
5611 } | 5611 } |
5612 | 5612 |
| 5613 inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, |
| 5614 SizeT Index3) { |
| 5615 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | |
| 5616 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); |
| 5617 assert(Mask < 256); |
| 5618 return Mask; |
| 5619 } |
| 5620 |
| 5621 template <typename TraitsType> |
| 5622 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( |
| 5623 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { |
| 5624 constexpr SizeT SrcBit = 1 << 2; |
| 5625 assert((Index0 & SrcBit) == (Index1 & SrcBit)); |
| 5626 assert((Index0 & SrcBit) == (Index2 & SrcBit)); |
| 5627 assert((Index0 & SrcBit) == (Index3 & SrcBit)); |
| 5628 (void)SrcBit; |
| 5629 |
| 5630 const Type SrcTy = Src->getType(); |
| 5631 auto *T = makeReg(SrcTy); |
| 5632 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); |
| 5633 auto *Mask = |
| 5634 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); |
| 5635 _pshufd(T, SrcRM, Mask); |
| 5636 return T; |
| 5637 } |
| 5638 |
| 5639 template <typename TraitsType> |
| 5640 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( |
| 5641 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, |
| 5642 SizeT Index3) { |
| 5643 constexpr SizeT SrcBit = 1 << 2; |
| 5644 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); |
| 5645 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); |
| 5646 (void)SrcBit; |
| 5647 |
| 5648 const Type SrcTy = Src0->getType(); |
| 5649 assert(Src1->getType() == SrcTy); |
| 5650 auto *T = makeReg(SrcTy); |
| 5651 auto *Src0R = legalizeToReg(Src0); |
| 5652 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 5653 auto *Mask = |
| 5654 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); |
| 5655 _movp(T, Src0R); |
| 5656 _shufps(T, Src1RM, Mask); |
| 5657 return T; |
| 5658 } |
| 5659 |
| 5660 template <typename TraitsType> |
| 5661 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5662 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { |
| 5663 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, |
| 5664 Index1, IGNORE_INDEX); |
| 5665 } |
| 5666 |
| 5667 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, |
| 5668 SizeT Index3) { |
| 5669 constexpr SizeT SrcBit = 1 << 2; |
| 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); |
| 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); |
| 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); |
| 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); |
| 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; |
| 5675 } |
| 5676 |
5613 template <typename TraitsType> | 5677 template <typename TraitsType> |
5614 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5678 void TargetX86Base<TraitsType>::lowerShuffleVector( |
5615 const InstShuffleVector *Instr) { | 5679 const InstShuffleVector *Instr) { |
5616 auto *Dest = Instr->getDest(); | 5680 auto *Dest = Instr->getDest(); |
5617 const Type DestTy = Dest->getType(); | 5681 const Type DestTy = Dest->getType(); |
| 5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); |
| 5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); |
| 5684 const SizeT NumElements = typeNumElements(DestTy); |
5618 | 5685 |
5619 auto *T = makeReg(DestTy); | 5686 auto *T = makeReg(DestTy); |
5620 | 5687 |
5621 switch (DestTy) { | 5688 switch (DestTy) { |
5622 default: | 5689 default: |
5623 break; | 5690 break; |
5624 // TODO(jpp): figure out how to properly lower this without scalarization. | 5691 // TODO(jpp): figure out how to properly lower the remaining cases without |
| 5692 // scalarization. |
| 5693 case IceType_v4i1: |
| 5694 case IceType_v4i32: |
| 5695 case IceType_v4f32: { |
| 5696 static constexpr SizeT ExpectedNumElements = 4; |
| 5697 assert(ExpectedNumElements == Instr->getNumIndexes()); |
| 5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); |
| 5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); |
| 5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); |
| 5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); |
| 5702 Variable *T = nullptr; |
| 5703 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { |
| 5704 #define CASE_SRCS_IN(S0, S1, S2, S3) \ |
| 5705 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) |
| 5706 CASE_SRCS_IN(0, 0, 0, 0) : { |
| 5707 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, |
| 5708 Index3); |
| 5709 } |
| 5710 break; |
| 5711 CASE_SRCS_IN(0, 0, 0, 1) : { |
| 5712 assert(false && "Following code is untested but likely correct; test " |
| 5713 "and remove assert."); |
| 5714 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, |
| 5715 Src1, Index3); |
| 5716 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, |
| 5717 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5718 } |
| 5719 break; |
| 5720 CASE_SRCS_IN(0, 0, 1, 0) : { |
| 5721 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, |
| 5722 Src0, Index3); |
| 5723 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, |
| 5724 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5725 } |
| 5726 break; |
| 5727 CASE_SRCS_IN(0, 0, 1, 1) : { |
| 5728 assert(false && "Following code is untested but likely correct; test " |
| 5729 "and remove assert."); |
| 5730 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, |
| 5731 Index2, Index3); |
| 5732 } |
| 5733 break; |
| 5734 CASE_SRCS_IN(0, 1, 0, 0) : { |
| 5735 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, |
| 5736 Src1, Index1); |
| 5737 T = lowerShuffleVector_TwoFromSameSrc( |
| 5738 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); |
| 5739 } |
| 5740 break; |
| 5741 CASE_SRCS_IN(0, 1, 0, 1) : { |
| 5742 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && |
| 5743 (Index3 - ExpectedNumElements) == 1) { |
| 5744 assert(false && "Following code is untested but likely correct; test " |
| 5745 "and remove assert."); |
| 5746 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 5747 auto *Src0R = legalizeToReg(Src0); |
| 5748 T = makeReg(DestTy); |
| 5749 _movp(T, Src0R); |
| 5750 _punpckl(T, Src1RM); |
| 5751 } else if (Index0 == Index2 && Index1 == Index3) { |
| 5752 assert(false && "Following code is untested but likely correct; test " |
| 5753 "and remove assert."); |
| 5754 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5755 Src0, Index0, Src1, Index1); |
| 5756 T = lowerShuffleVector_AllFromSameSrc( |
| 5757 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, |
| 5758 UNIFIED_INDEX_1); |
| 5759 } else { |
| 5760 assert(false && "Following code is untested but likely correct; test " |
| 5761 "and remove assert."); |
| 5762 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5763 Src0, Index0, Src1, Index1); |
| 5764 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5765 Src0, Index2, Src1, Index3); |
| 5766 T = lowerShuffleVector_TwoFromSameSrc( |
| 5767 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
| 5768 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5769 } |
| 5770 } |
| 5771 break; |
| 5772 CASE_SRCS_IN(0, 1, 1, 0) : { |
| 5773 if (Index0 == Index3 && Index1 == Index2) { |
| 5774 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5775 Src0, Index0, Src1, Index1); |
| 5776 T = lowerShuffleVector_AllFromSameSrc( |
| 5777 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, |
| 5778 UNIFIED_INDEX_0); |
| 5779 } else { |
| 5780 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5781 Src0, Index0, Src1, Index1); |
| 5782 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5783 Src1, Index2, Src0, Index3); |
| 5784 T = lowerShuffleVector_TwoFromSameSrc( |
| 5785 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
| 5786 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5787 } |
| 5788 } |
| 5789 break; |
| 5790 CASE_SRCS_IN(0, 1, 1, 1) : { |
| 5791 assert(false && "Following code is untested but likely correct; test " |
| 5792 "and remove assert."); |
| 5793 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, |
| 5794 Src1, Index1); |
| 5795 T = lowerShuffleVector_TwoFromSameSrc( |
| 5796 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); |
| 5797 } |
| 5798 break; |
| 5799 CASE_SRCS_IN(1, 0, 0, 0) : { |
| 5800 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, |
| 5801 Src0, Index1); |
| 5802 T = lowerShuffleVector_TwoFromSameSrc( |
| 5803 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); |
| 5804 } |
| 5805 break; |
| 5806 CASE_SRCS_IN(1, 0, 0, 1) : { |
| 5807 if (Index0 == Index3 && Index1 == Index2) { |
| 5808 assert(false && "Following code is untested but likely correct; test " |
| 5809 "and remove assert."); |
| 5810 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5811 Src1, Index0, Src0, Index1); |
| 5812 T = lowerShuffleVector_AllFromSameSrc( |
| 5813 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, |
| 5814 UNIFIED_INDEX_0); |
| 5815 } else { |
| 5816 assert(false && "Following code is untested but likely correct; test " |
| 5817 "and remove assert."); |
| 5818 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5819 Src1, Index0, Src0, Index1); |
| 5820 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5821 Src0, Index2, Src1, Index3); |
| 5822 T = lowerShuffleVector_TwoFromSameSrc( |
| 5823 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
| 5824 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5825 } |
| 5826 } |
| 5827 break; |
| 5828 CASE_SRCS_IN(1, 0, 1, 0) : { |
| 5829 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && |
| 5830 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { |
| 5831 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 5832 auto *Src0R = legalizeToReg(Src1); |
| 5833 T = makeReg(DestTy); |
| 5834 _movp(T, Src0R); |
| 5835 _punpckl(T, Src1RM); |
| 5836 } else if (Index0 == Index2 && Index1 == Index3) { |
| 5837 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5838 Src1, Index0, Src0, Index1); |
| 5839 T = lowerShuffleVector_AllFromSameSrc( |
| 5840 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, |
| 5841 UNIFIED_INDEX_1); |
| 5842 } else { |
| 5843 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5844 Src1, Index0, Src0, Index1); |
| 5845 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
| 5846 Src1, Index2, Src0, Index3); |
| 5847 T = lowerShuffleVector_TwoFromSameSrc( |
| 5848 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
| 5849 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5850 } |
| 5851 } |
| 5852 break; |
| 5853 CASE_SRCS_IN(1, 0, 1, 1) : { |
| 5854 assert(false && "Following code is untested but likely correct; test " |
| 5855 "and remove assert."); |
| 5856 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, |
| 5857 Src0, Index1); |
| 5858 T = lowerShuffleVector_TwoFromSameSrc( |
| 5859 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); |
| 5860 } |
| 5861 break; |
| 5862 CASE_SRCS_IN(1, 1, 0, 0) : { |
| 5863 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, |
| 5864 Index2, Index3); |
| 5865 } |
| 5866 break; |
| 5867 CASE_SRCS_IN(1, 1, 0, 1) : { |
| 5868 assert(false && "Following code is untested but likely correct; test " |
| 5869 "and remove assert."); |
| 5870 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, |
| 5871 Src1, Index3); |
| 5872 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, |
| 5873 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5874 } |
| 5875 break; |
| 5876 CASE_SRCS_IN(1, 1, 1, 0) : { |
| 5877 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, |
| 5878 Src0, Index3); |
| 5879 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, |
| 5880 UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
| 5881 } |
| 5882 break; |
| 5883 CASE_SRCS_IN(1, 1, 1, 1) : { |
| 5884 assert(false && "Following code is untested but likely correct; test " |
| 5885 "and remove assert."); |
| 5886 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, |
| 5887 Index3); |
| 5888 } |
| 5889 break; |
| 5890 #undef CASE_SRCS_IN |
| 5891 } |
| 5892 |
| 5893 assert(T != nullptr); |
| 5894 assert(T->getType() == DestTy); |
| 5895 _movp(Dest, T); |
| 5896 return; |
| 5897 } break; |
5625 } | 5898 } |
5626 | 5899 |
5627 // Unoptimized shuffle. Perform a series of inserts and extracts. | 5900 // Unoptimized shuffle. Perform a series of inserts and extracts. |
5628 Context.insert<InstFakeDef>(T); | 5901 Context.insert<InstFakeDef>(T); |
5629 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
5630 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
5631 const SizeT NumElements = typeNumElements(DestTy); | |
5632 const Type ElementType = typeElementType(DestTy); | 5902 const Type ElementType = typeElementType(DestTy); |
5633 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 5903 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { |
5634 auto *Index = Instr->getIndex(I); | 5904 auto *Index = Instr->getIndex(I); |
5635 const SizeT Elem = Index->getValue(); | 5905 const SizeT Elem = Index->getValue(); |
5636 auto *ExtElmt = makeReg(ElementType); | 5906 auto *ExtElmt = makeReg(ElementType); |
5637 if (Elem < NumElements) { | 5907 if (Elem < NumElements) { |
5638 lowerExtractElement( | 5908 lowerExtractElement( |
5639 InstExtractElement::create(Func, ExtElmt, Src0, Index)); | 5909 InstExtractElement::create(Func, ExtElmt, Src0, Index)); |
5640 } else { | 5910 } else { |
5641 lowerExtractElement(InstExtractElement::create( | 5911 lowerExtractElement(InstExtractElement::create( |
(...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7520 emitGlobal(*Var, SectionSuffix); | 7790 emitGlobal(*Var, SectionSuffix); |
7521 } | 7791 } |
7522 } | 7792 } |
7523 } break; | 7793 } break; |
7524 } | 7794 } |
7525 } | 7795 } |
7526 } // end of namespace X86NAMESPACE | 7796 } // end of namespace X86NAMESPACE |
7527 } // end of namespace Ice | 7797 } // end of namespace Ice |
7528 | 7798 |
7529 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7799 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |