OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 5552 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5563 } | 5563 } |
5564 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 5564 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
5565 // explicitly looks for a ret instruction as a marker for where to insert the | 5565 // explicitly looks for a ret instruction as a marker for where to insert the |
5566 // frame removal instructions. | 5566 // frame removal instructions. |
5567 _ret(Reg); | 5567 _ret(Reg); |
5568 // Add a fake use of esp to make sure esp stays alive for the entire | 5568 // Add a fake use of esp to make sure esp stays alive for the entire |
5569 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 5569 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
5570 keepEspLiveAtExit(); | 5570 keepEspLiveAtExit(); |
5571 } | 5571 } |
5572 | 5572 |
5573 inline int32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
Jim Stichnoth
2016/04/25 21:23:45
I'm wondering whether these SizeT should be explic
John
2016/04/25 22:38:39
These stem from SizeT values -- i.e., operand inde
Jim Stichnoth
2016/04/25 23:09:03
I should have added more to my description above o
John
2016/04/26 11:14:13
Acknowledged.
| |
5574 SizeT Index3) { | |
5575 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | | |
5576 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); | |
5577 assert(Mask < 256); | |
5578 return Mask; | |
Jim Stichnoth
2016/04/25 21:23:45
Mask is unsigned, but the function returns a signe
John
2016/04/25 22:38:39
Mixing signed and unsigned integer types... what c
| |
5579 } | |
5580 | |
5581 template <typename TraitsType> | |
5582 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( | |
5583 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { | |
5584 constexpr SizeT SrcBit = 1 << 2; | |
5585 assert((Index0 & SrcBit) == (Index1 & SrcBit)); | |
5586 assert((Index0 & SrcBit) == (Index2 & SrcBit)); | |
5587 assert((Index0 & SrcBit) == (Index3 & SrcBit)); | |
5588 (void)SrcBit; | |
5589 | |
5590 const Type SrcTy = Src->getType(); | |
5591 auto *T = makeReg(SrcTy); | |
5592 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); | |
5593 auto *Mask = | |
5594 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
5595 _pshufd(T, SrcRM, Mask); | |
5596 return T; | |
5597 } | |
5598 | |
5599 template <typename TraitsType> | |
5600 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( | |
5601 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, | |
5602 SizeT Index3) { | |
5603 constexpr SizeT SrcBit = 1 << 2; | |
5604 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); | |
5605 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); | |
5606 (void)SrcBit; | |
5607 | |
5608 const Type SrcTy = Src0->getType(); | |
5609 assert(Src1->getType() == SrcTy); | |
5610 auto *T = makeReg(SrcTy); | |
5611 auto *Src0R = legalizeToReg(Src0); | |
5612 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
5613 auto *Mask = | |
5614 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
5615 _movp(T, Src0R); | |
5616 _shufps(T, Src1RM, Mask); | |
5617 return T; | |
5618 } | |
5619 | |
5620 template <typename TraitsType> | |
5621 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( | |
5622 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { | |
5623 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, | |
5624 Index1, IGNORE_INDEX); | |
5625 } | |
5626 | |
5627 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
5628 SizeT Index3) { | |
5629 constexpr SizeT SrcBit = 1 << 2; | |
5630 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | |
5631 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | |
5632 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | |
5633 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | |
5634 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | |
5635 } | |
5636 | |
5573 template <typename TraitsType> | 5637 template <typename TraitsType> |
5574 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5638 void TargetX86Base<TraitsType>::lowerShuffleVector( |
5575 const InstShuffleVector *Instr) { | 5639 const InstShuffleVector *Instr) { |
5576 auto *Dest = Instr->getDest(); | 5640 auto *Dest = Instr->getDest(); |
5577 const Type DestTy = Dest->getType(); | 5641 const Type DestTy = Dest->getType(); |
5642 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
5643 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
5644 const SizeT NumElements = typeNumElements(DestTy); | |
5578 | 5645 |
5579 auto *T = makeReg(DestTy); | 5646 auto *T = makeReg(DestTy); |
5580 | 5647 |
5581 switch (DestTy) { | 5648 switch (DestTy) { |
5582 default: | 5649 default: |
5583 break; | 5650 break; |
5584 // TODO(jpp): figure out how to properly lower this without scalarization. | 5651 // TODO(jpp): figure out how to properly lower the remaining cases without |
5652 // scalarization. | |
5653 case IceType_v4i1: | |
5654 case IceType_v4i32: | |
5655 case IceType_v4f32: { | |
5656 static constexpr SizeT ExpectedNumElements = 4; | |
5657 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
5658 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
5659 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
5660 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
5661 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
5662 Variable *T = nullptr; | |
5663 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { | |
5664 #define CASE_SRCS_IN(S0, S1, S2, S3) \ | |
5665 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) | |
5666 CASE_SRCS_IN(0, 0, 0, 0) : { | |
5667 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, | |
5668 Index3); | |
5669 } | |
5670 break; | |
5671 CASE_SRCS_IN(0, 0, 0, 1) : { | |
5672 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
5673 Src1, Index3); | |
5674 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
5675 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5676 } | |
5677 break; | |
5678 CASE_SRCS_IN(0, 0, 1, 0) : { | |
5679 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
5680 Src0, Index3); | |
5681 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
5682 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5683 } | |
5684 break; | |
5685 CASE_SRCS_IN(0, 0, 1, 1) : { | |
5686 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, | |
5687 Index2, Index3); | |
5688 } | |
5689 break; | |
5690 CASE_SRCS_IN(0, 1, 0, 0) : { | |
5691 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
5692 Src1, Index1); | |
5693 T = lowerShuffleVector_TwoFromSameSrc( | |
5694 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
5695 } | |
5696 break; | |
5697 CASE_SRCS_IN(0, 1, 0, 1) : { | |
5698 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && | |
5699 (Index3 - ExpectedNumElements) == 1) { | |
5700 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
5701 auto *Src0R = legalizeToReg(Src0); | |
5702 T = makeReg(DestTy); | |
5703 _movp(T, Src0R); | |
5704 _punpckl(T, Src1RM); | |
5705 assert(false); | |
Jim Stichnoth
2016/04/25 21:23:45
??? Is something got getting tested that ought to
John
2016/04/25 22:38:39
Discussed offline.
For posterity: none of the CAS
| |
5706 } else if (Index0 == Index2 && Index1 == Index3) { | |
5707 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5708 Src0, Index0, Src1, Index1); | |
5709 T = lowerShuffleVector_AllFromSameSrc( | |
5710 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
5711 UNIFIED_INDEX_1); | |
5712 } else { | |
5713 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5714 Src0, Index0, Src1, Index1); | |
5715 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5716 Src0, Index2, Src1, Index3); | |
5717 T = lowerShuffleVector_TwoFromSameSrc( | |
5718 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5719 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5720 } | |
5721 } | |
5722 break; | |
5723 CASE_SRCS_IN(0, 1, 1, 0) : { | |
5724 if (Index0 == Index3 && Index1 == Index2) { | |
5725 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5726 Src0, Index0, Src1, Index1); | |
5727 T = lowerShuffleVector_AllFromSameSrc( | |
5728 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
5729 UNIFIED_INDEX_0); | |
5730 } else { | |
5731 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5732 Src0, Index0, Src1, Index1); | |
5733 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5734 Src1, Index2, Src0, Index3); | |
5735 T = lowerShuffleVector_TwoFromSameSrc( | |
5736 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5737 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5738 } | |
5739 } | |
5740 break; | |
5741 CASE_SRCS_IN(0, 1, 1, 1) : { | |
5742 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
5743 Src1, Index1); | |
5744 T = lowerShuffleVector_TwoFromSameSrc( | |
5745 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
5746 } | |
5747 break; | |
5748 CASE_SRCS_IN(1, 0, 0, 0) : { | |
5749 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
5750 Src0, Index1); | |
5751 T = lowerShuffleVector_TwoFromSameSrc( | |
5752 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
5753 } | |
5754 break; | |
5755 CASE_SRCS_IN(1, 0, 0, 1) : { | |
5756 if (Index0 == Index3 && Index1 == Index2) { | |
5757 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5758 Src1, Index0, Src0, Index1); | |
5759 T = lowerShuffleVector_AllFromSameSrc( | |
5760 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
5761 UNIFIED_INDEX_0); | |
5762 } else { | |
5763 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5764 Src1, Index0, Src0, Index1); | |
5765 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5766 Src0, Index2, Src1, Index3); | |
5767 T = lowerShuffleVector_TwoFromSameSrc( | |
5768 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5769 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5770 } | |
5771 } | |
5772 break; | |
5773 CASE_SRCS_IN(1, 0, 1, 0) : { | |
5774 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && | |
5775 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { | |
5776 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
5777 auto *Src0R = legalizeToReg(Src1); | |
5778 T = makeReg(DestTy); | |
5779 _movp(T, Src0R); | |
5780 _punpckl(T, Src1RM); | |
5781 } else if (Index0 == Index2 && Index1 == Index3) { | |
5782 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5783 Src1, Index0, Src0, Index1); | |
5784 T = lowerShuffleVector_AllFromSameSrc( | |
5785 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
5786 UNIFIED_INDEX_1); | |
5787 } else { | |
5788 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5789 Src1, Index0, Src0, Index1); | |
5790 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
5791 Src1, Index2, Src0, Index3); | |
5792 T = lowerShuffleVector_TwoFromSameSrc( | |
5793 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
5794 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5795 } | |
5796 } | |
5797 break; | |
5798 CASE_SRCS_IN(1, 0, 1, 1) : { | |
5799 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
5800 Src0, Index1); | |
5801 T = lowerShuffleVector_TwoFromSameSrc( | |
5802 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
5803 } | |
5804 break; | |
5805 CASE_SRCS_IN(1, 1, 0, 0) : { | |
5806 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, | |
5807 Index2, Index3); | |
5808 } | |
5809 break; | |
5810 CASE_SRCS_IN(1, 1, 0, 1) : { | |
5811 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
5812 Src1, Index3); | |
5813 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
5814 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5815 } | |
5816 break; | |
5817 CASE_SRCS_IN(1, 1, 1, 0) : { | |
5818 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
5819 Src0, Index3); | |
5820 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
5821 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
5822 } | |
5823 break; | |
5824 CASE_SRCS_IN(1, 1, 1, 1) : { | |
5825 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, | |
5826 Index3); | |
5827 } | |
5828 break; | |
5829 #undef CASE_SRCS_IN | |
5830 } | |
5831 | |
5832 assert(T != nullptr); | |
5833 assert(T->getType() == DestTy); | |
5834 _movp(Dest, T); | |
5835 return; | |
5836 } break; | |
5585 } | 5837 } |
5586 | 5838 |
5587 // Unoptimized shuffle. Perform a series of inserts and extracts. | 5839 // Unoptimized shuffle. Perform a series of inserts and extracts. |
5588 Context.insert<InstFakeDef>(T); | 5840 Context.insert<InstFakeDef>(T); |
5589 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
5590 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
5591 const SizeT NumElements = typeNumElements(DestTy); | |
5592 const Type ElementType = typeElementType(DestTy); | 5841 const Type ElementType = typeElementType(DestTy); |
5593 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 5842 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { |
5594 auto *Index = Instr->getIndex(I); | 5843 auto *Index = Instr->getIndex(I); |
5595 const SizeT Elem = Index->getValue(); | 5844 const SizeT Elem = Index->getValue(); |
5596 auto *ExtElmt = makeReg(ElementType); | 5845 auto *ExtElmt = makeReg(ElementType); |
5597 if (Elem < NumElements) { | 5846 if (Elem < NumElements) { |
5598 lowerExtractElement( | 5847 lowerExtractElement( |
5599 InstExtractElement::create(Func, ExtElmt, Src0, Index)); | 5848 InstExtractElement::create(Func, ExtElmt, Src0, Index)); |
5600 } else { | 5849 } else { |
5601 lowerExtractElement(InstExtractElement::create( | 5850 lowerExtractElement(InstExtractElement::create( |
(...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
7480 emitGlobal(*Var, SectionSuffix); | 7729 emitGlobal(*Var, SectionSuffix); |
7481 } | 7730 } |
7482 } | 7731 } |
7483 } break; | 7732 } break; |
7484 } | 7733 } |
7485 } | 7734 } |
7486 } // end of namespace X86NAMESPACE | 7735 } // end of namespace X86NAMESPACE |
7487 } // end of namespace Ice | 7736 } // end of namespace Ice |
7488 | 7737 |
7489 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7738 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |