Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 5592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5603 } | 5603 } |
| 5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 5604 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
| 5605 // explicitly looks for a ret instruction as a marker for where to insert the | 5605 // explicitly looks for a ret instruction as a marker for where to insert the |
| 5606 // frame removal instructions. | 5606 // frame removal instructions. |
| 5607 _ret(Reg); | 5607 _ret(Reg); |
| 5608 // Add a fake use of esp to make sure esp stays alive for the entire | 5608 // Add a fake use of esp to make sure esp stays alive for the entire |
| 5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 5609 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
| 5610 keepEspLiveAtExit(); | 5610 keepEspLiveAtExit(); |
| 5611 } | 5611 } |
| 5612 | 5612 |
| 5613 inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
| 5614 SizeT Index3) { | |
| 5615 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | | |
| 5616 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); | |
| 5617 assert(Mask < 256); | |
| 5618 return Mask; | |
| 5619 } | |
| 5620 | |
| 5621 template <typename TraitsType> | |
| 5622 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( | |
| 5623 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { | |
| 5624 constexpr SizeT SrcBit = 1 << 2; | |
| 5625 assert((Index0 & SrcBit) == (Index1 & SrcBit)); | |
| 5626 assert((Index0 & SrcBit) == (Index2 & SrcBit)); | |
| 5627 assert((Index0 & SrcBit) == (Index3 & SrcBit)); | |
| 5628 (void)SrcBit; | |
| 5629 | |
| 5630 const Type SrcTy = Src->getType(); | |
| 5631 auto *T = makeReg(SrcTy); | |
| 5632 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); | |
| 5633 auto *Mask = | |
| 5634 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
| 5635 _pshufd(T, SrcRM, Mask); | |
| 5636 return T; | |
| 5637 } | |
| 5638 | |
| 5639 template <typename TraitsType> | |
| 5640 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( | |
| 5641 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, | |
| 5642 SizeT Index3) { | |
| 5643 constexpr SizeT SrcBit = 1 << 2; | |
| 5644 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); | |
| 5645 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); | |
| 5646 (void)SrcBit; | |
| 5647 | |
| 5648 const Type SrcTy = Src0->getType(); | |
| 5649 assert(Src1->getType() == SrcTy); | |
| 5650 auto *T = makeReg(SrcTy); | |
| 5651 auto *Src0R = legalizeToReg(Src0); | |
| 5652 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 5653 auto *Mask = | |
| 5654 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
| 5655 _movp(T, Src0R); | |
| 5656 _shufps(T, Src1RM, Mask); | |
| 5657 return T; | |
| 5658 } | |
| 5659 | |
| 5660 template <typename TraitsType> | |
| 5661 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5662 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { | |
| 5663 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, | |
| 5664 Index1, IGNORE_INDEX); | |
| 5665 } | |
| 5666 | |
| 5667 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
| 5668 SizeT Index3) { | |
| 5669 constexpr SizeT SrcBit = 1 << 2; | |
| 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | |
| 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | |
| 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | |
| 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | |
| 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | |
| 5675 } | |
| 5676 | |
| 5613 template <typename TraitsType> | 5677 template <typename TraitsType> |
| 5614 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5678 void TargetX86Base<TraitsType>::lowerShuffleVector( |
| 5615 const InstShuffleVector *Instr) { | 5679 const InstShuffleVector *Instr) { |
| 5616 auto *Dest = Instr->getDest(); | 5680 auto *Dest = Instr->getDest(); |
| 5617 const Type DestTy = Dest->getType(); | 5681 const Type DestTy = Dest->getType(); |
| 5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
| 5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
| 5684 const SizeT NumElements = typeNumElements(DestTy); | |
| 5618 | 5685 |
| 5619 auto *T = makeReg(DestTy); | 5686 auto *T = makeReg(DestTy); |
| 5620 | 5687 |
| 5621 switch (DestTy) { | 5688 switch (DestTy) { |
| 5622 default: | 5689 default: |
| 5623 break; | 5690 break; |
| 5624 // TODO(jpp): figure out how to properly lower this without scalarization. | 5691 // TODO(jpp): figure out how to properly lower the remaining cases without |
| 5692 // scalarization. | |
| 5693 case IceType_v4i1: | |
| 5694 case IceType_v4i32: | |
| 5695 case IceType_v4f32: { | |
| 5696 static constexpr SizeT ExpectedNumElements = 4; | |
| 5697 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
| 5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
| 5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
| 5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
| 5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
| 5702 Variable *T = nullptr; | |
| 5703 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { | |
| 5704 #define CASE_SRCS_IN(S0, S1, S2, S3) \ | |
| 5705 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) | |
| 5706 CASE_SRCS_IN(0, 0, 0, 0) : { | |
| 5707 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, | |
| 5708 Index3); | |
| 5709 } | |
| 5710 break; | |
| 5711 CASE_SRCS_IN(0, 0, 0, 1) : { | |
| 5712 assert(false); | |
|
Jim Stichnoth
2016/04/25 23:09:03
Please change all these to something like:
assert
John
2016/04/26 11:14:13
Done.
| |
| 5713 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
| 5714 Src1, Index3); | |
| 5715 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
| 5716 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5717 } | |
| 5718 break; | |
| 5719 CASE_SRCS_IN(0, 0, 1, 0) : { | |
| 5720 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
| 5721 Src0, Index3); | |
| 5722 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
| 5723 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5724 } | |
| 5725 break; | |
| 5726 CASE_SRCS_IN(0, 0, 1, 1) : { | |
| 5727 assert(false); | |
| 5728 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, | |
| 5729 Index2, Index3); | |
| 5730 } | |
| 5731 break; | |
| 5732 CASE_SRCS_IN(0, 1, 0, 0) : { | |
| 5733 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
| 5734 Src1, Index1); | |
| 5735 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5736 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
| 5737 } | |
| 5738 break; | |
| 5739 CASE_SRCS_IN(0, 1, 0, 1) : { | |
| 5740 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && | |
| 5741 (Index3 - ExpectedNumElements) == 1) { | |
| 5742 assert(false); | |
| 5743 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 5744 auto *Src0R = legalizeToReg(Src0); | |
| 5745 T = makeReg(DestTy); | |
| 5746 _movp(T, Src0R); | |
| 5747 _punpckl(T, Src1RM); | |
| 5748 } else if (Index0 == Index2 && Index1 == Index3) { | |
| 5749 assert(false); | |
| 5750 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5751 Src0, Index0, Src1, Index1); | |
| 5752 T = lowerShuffleVector_AllFromSameSrc( | |
| 5753 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
| 5754 UNIFIED_INDEX_1); | |
| 5755 } else { | |
| 5756 assert(false); | |
| 5757 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5758 Src0, Index0, Src1, Index1); | |
| 5759 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5760 Src0, Index2, Src1, Index3); | |
| 5761 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5762 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5763 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5764 } | |
| 5765 } | |
| 5766 break; | |
| 5767 CASE_SRCS_IN(0, 1, 1, 0) : { | |
| 5768 if (Index0 == Index3 && Index1 == Index2) { | |
| 5769 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5770 Src0, Index0, Src1, Index1); | |
| 5771 T = lowerShuffleVector_AllFromSameSrc( | |
| 5772 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
| 5773 UNIFIED_INDEX_0); | |
| 5774 } else { | |
| 5775 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5776 Src0, Index0, Src1, Index1); | |
| 5777 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5778 Src1, Index2, Src0, Index3); | |
| 5779 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5780 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5781 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5782 } | |
| 5783 } | |
| 5784 break; | |
| 5785 CASE_SRCS_IN(0, 1, 1, 1) : { | |
| 5786 assert(false); | |
| 5787 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
| 5788 Src1, Index1); | |
| 5789 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5790 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
| 5791 } | |
| 5792 break; | |
| 5793 CASE_SRCS_IN(1, 0, 0, 0) : { | |
| 5794 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
| 5795 Src0, Index1); | |
| 5796 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5797 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
| 5798 } | |
| 5799 break; | |
| 5800 CASE_SRCS_IN(1, 0, 0, 1) : { | |
| 5801 if (Index0 == Index3 && Index1 == Index2) { | |
| 5802 assert(false); | |
| 5803 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5804 Src1, Index0, Src0, Index1); | |
| 5805 T = lowerShuffleVector_AllFromSameSrc( | |
| 5806 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
| 5807 UNIFIED_INDEX_0); | |
| 5808 } else { | |
| 5809 assert(false); | |
| 5810 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5811 Src1, Index0, Src0, Index1); | |
| 5812 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5813 Src0, Index2, Src1, Index3); | |
| 5814 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5815 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5816 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5817 } | |
| 5818 } | |
| 5819 break; | |
| 5820 CASE_SRCS_IN(1, 0, 1, 0) : { | |
| 5821 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && | |
| 5822 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { | |
| 5823 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 5824 auto *Src0R = legalizeToReg(Src1); | |
| 5825 T = makeReg(DestTy); | |
| 5826 _movp(T, Src0R); | |
| 5827 _punpckl(T, Src1RM); | |
| 5828 } else if (Index0 == Index2 && Index1 == Index3) { | |
| 5829 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5830 Src1, Index0, Src0, Index1); | |
| 5831 T = lowerShuffleVector_AllFromSameSrc( | |
| 5832 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
| 5833 UNIFIED_INDEX_1); | |
| 5834 } else { | |
| 5835 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5836 Src1, Index0, Src0, Index1); | |
| 5837 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5838 Src1, Index2, Src0, Index3); | |
| 5839 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5840 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5841 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5842 } | |
| 5843 } | |
| 5844 break; | |
| 5845 CASE_SRCS_IN(1, 0, 1, 1) : { | |
| 5846 assert(false); | |
| 5847 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
| 5848 Src0, Index1); | |
| 5849 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5850 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
| 5851 } | |
| 5852 break; | |
| 5853 CASE_SRCS_IN(1, 1, 0, 0) : { | |
| 5854 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, | |
| 5855 Index2, Index3); | |
| 5856 } | |
| 5857 break; | |
| 5858 CASE_SRCS_IN(1, 1, 0, 1) : { | |
| 5859 assert(false); | |
| 5860 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
| 5861 Src1, Index3); | |
| 5862 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
| 5863 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5864 } | |
| 5865 break; | |
| 5866 CASE_SRCS_IN(1, 1, 1, 0) : { | |
| 5867 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
| 5868 Src0, Index3); | |
| 5869 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
| 5870 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5871 } | |
| 5872 break; | |
| 5873 CASE_SRCS_IN(1, 1, 1, 1) : { | |
| 5874 assert(false); | |
| 5875 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, | |
| 5876 Index3); | |
| 5877 } | |
| 5878 break; | |
| 5879 #undef CASE_SRCS_IN | |
| 5880 } | |
| 5881 | |
| 5882 assert(T != nullptr); | |
| 5883 assert(T->getType() == DestTy); | |
| 5884 _movp(Dest, T); | |
| 5885 return; | |
| 5886 } break; | |
| 5625 } | 5887 } |
| 5626 | 5888 |
| 5627 // Unoptimized shuffle. Perform a series of inserts and extracts. | 5889 // Unoptimized shuffle. Perform a series of inserts and extracts. |
| 5628 Context.insert<InstFakeDef>(T); | 5890 Context.insert<InstFakeDef>(T); |
| 5629 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
| 5630 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
| 5631 const SizeT NumElements = typeNumElements(DestTy); | |
| 5632 const Type ElementType = typeElementType(DestTy); | 5891 const Type ElementType = typeElementType(DestTy); |
| 5633 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 5892 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { |
| 5634 auto *Index = Instr->getIndex(I); | 5893 auto *Index = Instr->getIndex(I); |
| 5635 const SizeT Elem = Index->getValue(); | 5894 const SizeT Elem = Index->getValue(); |
| 5636 auto *ExtElmt = makeReg(ElementType); | 5895 auto *ExtElmt = makeReg(ElementType); |
| 5637 if (Elem < NumElements) { | 5896 if (Elem < NumElements) { |
| 5638 lowerExtractElement( | 5897 lowerExtractElement( |
| 5639 InstExtractElement::create(Func, ExtElmt, Src0, Index)); | 5898 InstExtractElement::create(Func, ExtElmt, Src0, Index)); |
| 5640 } else { | 5899 } else { |
| 5641 lowerExtractElement(InstExtractElement::create( | 5900 lowerExtractElement(InstExtractElement::create( |
| (...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 7520 emitGlobal(*Var, SectionSuffix); | 7779 emitGlobal(*Var, SectionSuffix); |
| 7521 } | 7780 } |
| 7522 } | 7781 } |
| 7523 } break; | 7782 } break; |
| 7524 } | 7783 } |
| 7525 } | 7784 } |
| 7526 } // end of namespace X86NAMESPACE | 7785 } // end of namespace X86NAMESPACE |
| 7527 } // end of namespace Ice | 7786 } // end of namespace Ice |
| 7528 | 7787 |
| 7529 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7788 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |