Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 5552 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5563 } | 5563 } |
| 5564 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 5564 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
| 5565 // explicitly looks for a ret instruction as a marker for where to insert the | 5565 // explicitly looks for a ret instruction as a marker for where to insert the |
| 5566 // frame removal instructions. | 5566 // frame removal instructions. |
| 5567 _ret(Reg); | 5567 _ret(Reg); |
| 5568 // Add a fake use of esp to make sure esp stays alive for the entire | 5568 // Add a fake use of esp to make sure esp stays alive for the entire |
| 5569 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 5569 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
| 5570 keepEspLiveAtExit(); | 5570 keepEspLiveAtExit(); |
| 5571 } | 5571 } |
| 5572 | 5572 |
| 5573 inline int32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
|
Jim Stichnoth
2016/04/25 21:23:45
I'm wondering whether these SizeT should be explic
John
2016/04/25 22:38:39
These stem from SizeT values -- i.e., operand inde
Jim Stichnoth
2016/04/25 23:09:03
I should have added more to my description above o
John
2016/04/26 11:14:13
Acknowledged.
| |
| 5574 SizeT Index3) { | |
| 5575 const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | | |
| 5576 ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); | |
| 5577 assert(Mask < 256); | |
| 5578 return Mask; | |
|
Jim Stichnoth
2016/04/25 21:23:45
Mask is unsigned, but the function returns a signe
John
2016/04/25 22:38:39
Mixing signed and unsigned integer types... what c
| |
| 5579 } | |
| 5580 | |
| 5581 template <typename TraitsType> | |
| 5582 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( | |
| 5583 Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { | |
| 5584 constexpr SizeT SrcBit = 1 << 2; | |
| 5585 assert((Index0 & SrcBit) == (Index1 & SrcBit)); | |
| 5586 assert((Index0 & SrcBit) == (Index2 & SrcBit)); | |
| 5587 assert((Index0 & SrcBit) == (Index3 & SrcBit)); | |
| 5588 (void)SrcBit; | |
| 5589 | |
| 5590 const Type SrcTy = Src->getType(); | |
| 5591 auto *T = makeReg(SrcTy); | |
| 5592 auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); | |
| 5593 auto *Mask = | |
| 5594 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
| 5595 _pshufd(T, SrcRM, Mask); | |
| 5596 return T; | |
| 5597 } | |
| 5598 | |
| 5599 template <typename TraitsType> | |
| 5600 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( | |
| 5601 Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, | |
| 5602 SizeT Index3) { | |
| 5603 constexpr SizeT SrcBit = 1 << 2; | |
| 5604 assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); | |
| 5605 assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); | |
| 5606 (void)SrcBit; | |
| 5607 | |
| 5608 const Type SrcTy = Src0->getType(); | |
| 5609 assert(Src1->getType() == SrcTy); | |
| 5610 auto *T = makeReg(SrcTy); | |
| 5611 auto *Src0R = legalizeToReg(Src0); | |
| 5612 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 5613 auto *Mask = | |
| 5614 Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | |
| 5615 _movp(T, Src0R); | |
| 5616 _shufps(T, Src1RM, Mask); | |
| 5617 return T; | |
| 5618 } | |
| 5619 | |
| 5620 template <typename TraitsType> | |
| 5621 Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5622 Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { | |
| 5623 return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, | |
| 5624 Index1, IGNORE_INDEX); | |
| 5625 } | |
| 5626 | |
| 5627 inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, | |
| 5628 SizeT Index3) { | |
| 5629 constexpr SizeT SrcBit = 1 << 2; | |
| 5630 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | |
| 5631 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | |
| 5632 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | |
| 5633 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | |
| 5634 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | |
| 5635 } | |
| 5636 | |
| 5573 template <typename TraitsType> | 5637 template <typename TraitsType> |
| 5574 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5638 void TargetX86Base<TraitsType>::lowerShuffleVector( |
| 5575 const InstShuffleVector *Instr) { | 5639 const InstShuffleVector *Instr) { |
| 5576 auto *Dest = Instr->getDest(); | 5640 auto *Dest = Instr->getDest(); |
| 5577 const Type DestTy = Dest->getType(); | 5641 const Type DestTy = Dest->getType(); |
| 5642 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
| 5643 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
| 5644 const SizeT NumElements = typeNumElements(DestTy); | |
| 5578 | 5645 |
| 5579 auto *T = makeReg(DestTy); | 5646 auto *T = makeReg(DestTy); |
| 5580 | 5647 |
| 5581 switch (DestTy) { | 5648 switch (DestTy) { |
| 5582 default: | 5649 default: |
| 5583 break; | 5650 break; |
| 5584 // TODO(jpp): figure out how to properly lower this without scalarization. | 5651 // TODO(jpp): figure out how to properly lower the remaining cases without |
| 5652 // scalarization. | |
| 5653 case IceType_v4i1: | |
| 5654 case IceType_v4i32: | |
| 5655 case IceType_v4f32: { | |
| 5656 static constexpr SizeT ExpectedNumElements = 4; | |
| 5657 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
| 5658 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
| 5659 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
| 5660 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
| 5661 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
| 5662 Variable *T = nullptr; | |
| 5663 switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { | |
| 5664 #define CASE_SRCS_IN(S0, S1, S2, S3) \ | |
| 5665 case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) | |
| 5666 CASE_SRCS_IN(0, 0, 0, 0) : { | |
| 5667 T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, | |
| 5668 Index3); | |
| 5669 } | |
| 5670 break; | |
| 5671 CASE_SRCS_IN(0, 0, 0, 1) : { | |
| 5672 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
| 5673 Src1, Index3); | |
| 5674 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
| 5675 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5676 } | |
| 5677 break; | |
| 5678 CASE_SRCS_IN(0, 0, 1, 0) : { | |
| 5679 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
| 5680 Src0, Index3); | |
| 5681 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | |
| 5682 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5683 } | |
| 5684 break; | |
| 5685 CASE_SRCS_IN(0, 0, 1, 1) : { | |
| 5686 T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, | |
| 5687 Index2, Index3); | |
| 5688 } | |
| 5689 break; | |
| 5690 CASE_SRCS_IN(0, 1, 0, 0) : { | |
| 5691 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
| 5692 Src1, Index1); | |
| 5693 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5694 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
| 5695 } | |
| 5696 break; | |
| 5697 CASE_SRCS_IN(0, 1, 0, 1) : { | |
| 5698 if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && | |
| 5699 (Index3 - ExpectedNumElements) == 1) { | |
| 5700 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 5701 auto *Src0R = legalizeToReg(Src0); | |
| 5702 T = makeReg(DestTy); | |
| 5703 _movp(T, Src0R); | |
| 5704 _punpckl(T, Src1RM); | |
| 5705 assert(false); | |
|
Jim Stichnoth
2016/04/25 21:23:45
??? Is something got getting tested that ought to
John
2016/04/25 22:38:39
Discussed offline.
For posterity: none of the CAS
| |
| 5706 } else if (Index0 == Index2 && Index1 == Index3) { | |
| 5707 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5708 Src0, Index0, Src1, Index1); | |
| 5709 T = lowerShuffleVector_AllFromSameSrc( | |
| 5710 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
| 5711 UNIFIED_INDEX_1); | |
| 5712 } else { | |
| 5713 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5714 Src0, Index0, Src1, Index1); | |
| 5715 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5716 Src0, Index2, Src1, Index3); | |
| 5717 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5718 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5719 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5720 } | |
| 5721 } | |
| 5722 break; | |
| 5723 CASE_SRCS_IN(0, 1, 1, 0) : { | |
| 5724 if (Index0 == Index3 && Index1 == Index2) { | |
| 5725 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5726 Src0, Index0, Src1, Index1); | |
| 5727 T = lowerShuffleVector_AllFromSameSrc( | |
| 5728 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
| 5729 UNIFIED_INDEX_0); | |
| 5730 } else { | |
| 5731 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5732 Src0, Index0, Src1, Index1); | |
| 5733 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5734 Src1, Index2, Src0, Index3); | |
| 5735 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5736 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5737 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5738 } | |
| 5739 } | |
| 5740 break; | |
| 5741 CASE_SRCS_IN(0, 1, 1, 1) : { | |
| 5742 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | |
| 5743 Src1, Index1); | |
| 5744 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5745 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
| 5746 } | |
| 5747 break; | |
| 5748 CASE_SRCS_IN(1, 0, 0, 0) : { | |
| 5749 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
| 5750 Src0, Index1); | |
| 5751 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5752 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | |
| 5753 } | |
| 5754 break; | |
| 5755 CASE_SRCS_IN(1, 0, 0, 1) : { | |
| 5756 if (Index0 == Index3 && Index1 == Index2) { | |
| 5757 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5758 Src1, Index0, Src0, Index1); | |
| 5759 T = lowerShuffleVector_AllFromSameSrc( | |
| 5760 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | |
| 5761 UNIFIED_INDEX_0); | |
| 5762 } else { | |
| 5763 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5764 Src1, Index0, Src0, Index1); | |
| 5765 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5766 Src0, Index2, Src1, Index3); | |
| 5767 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5768 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5769 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5770 } | |
| 5771 } | |
| 5772 break; | |
| 5773 CASE_SRCS_IN(1, 0, 1, 0) : { | |
| 5774 if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && | |
| 5775 (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { | |
| 5776 auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 5777 auto *Src0R = legalizeToReg(Src1); | |
| 5778 T = makeReg(DestTy); | |
| 5779 _movp(T, Src0R); | |
| 5780 _punpckl(T, Src1RM); | |
| 5781 } else if (Index0 == Index2 && Index1 == Index3) { | |
| 5782 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5783 Src1, Index0, Src0, Index1); | |
| 5784 T = lowerShuffleVector_AllFromSameSrc( | |
| 5785 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | |
| 5786 UNIFIED_INDEX_1); | |
| 5787 } else { | |
| 5788 auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5789 Src1, Index0, Src0, Index1); | |
| 5790 auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | |
| 5791 Src1, Index2, Src0, Index3); | |
| 5792 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5793 Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | |
| 5794 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5795 } | |
| 5796 } | |
| 5797 break; | |
| 5798 CASE_SRCS_IN(1, 0, 1, 1) : { | |
| 5799 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | |
| 5800 Src0, Index1); | |
| 5801 T = lowerShuffleVector_TwoFromSameSrc( | |
| 5802 Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | |
| 5803 } | |
| 5804 break; | |
| 5805 CASE_SRCS_IN(1, 1, 0, 0) : { | |
| 5806 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, | |
| 5807 Index2, Index3); | |
| 5808 } | |
| 5809 break; | |
| 5810 CASE_SRCS_IN(1, 1, 0, 1) : { | |
| 5811 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | |
| 5812 Src1, Index3); | |
| 5813 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
| 5814 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5815 } | |
| 5816 break; | |
| 5817 CASE_SRCS_IN(1, 1, 1, 0) : { | |
| 5818 auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | |
| 5819 Src0, Index3); | |
| 5820 T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | |
| 5821 UNIFIED_INDEX_0, UNIFIED_INDEX_1); | |
| 5822 } | |
| 5823 break; | |
| 5824 CASE_SRCS_IN(1, 1, 1, 1) : { | |
| 5825 T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, | |
| 5826 Index3); | |
| 5827 } | |
| 5828 break; | |
| 5829 #undef CASE_SRCS_IN | |
| 5830 } | |
| 5831 | |
| 5832 assert(T != nullptr); | |
| 5833 assert(T->getType() == DestTy); | |
| 5834 _movp(Dest, T); | |
| 5835 return; | |
| 5836 } break; | |
| 5585 } | 5837 } |
| 5586 | 5838 |
| 5587 // Unoptimized shuffle. Perform a series of inserts and extracts. | 5839 // Unoptimized shuffle. Perform a series of inserts and extracts. |
| 5588 Context.insert<InstFakeDef>(T); | 5840 Context.insert<InstFakeDef>(T); |
| 5589 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | |
| 5590 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | |
| 5591 const SizeT NumElements = typeNumElements(DestTy); | |
| 5592 const Type ElementType = typeElementType(DestTy); | 5841 const Type ElementType = typeElementType(DestTy); |
| 5593 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 5842 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { |
| 5594 auto *Index = Instr->getIndex(I); | 5843 auto *Index = Instr->getIndex(I); |
| 5595 const SizeT Elem = Index->getValue(); | 5844 const SizeT Elem = Index->getValue(); |
| 5596 auto *ExtElmt = makeReg(ElementType); | 5845 auto *ExtElmt = makeReg(ElementType); |
| 5597 if (Elem < NumElements) { | 5846 if (Elem < NumElements) { |
| 5598 lowerExtractElement( | 5847 lowerExtractElement( |
| 5599 InstExtractElement::create(Func, ExtElmt, Src0, Index)); | 5848 InstExtractElement::create(Func, ExtElmt, Src0, Index)); |
| 5600 } else { | 5849 } else { |
| 5601 lowerExtractElement(InstExtractElement::create( | 5850 lowerExtractElement(InstExtractElement::create( |
| (...skipping 1878 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 7480 emitGlobal(*Var, SectionSuffix); | 7729 emitGlobal(*Var, SectionSuffix); |
| 7481 } | 7730 } |
| 7482 } | 7731 } |
| 7483 } break; | 7732 } break; |
| 7484 } | 7733 } |
| 7485 } | 7734 } |
| 7486 } // end of namespace X86NAMESPACE | 7735 } // end of namespace X86NAMESPACE |
| 7487 } // end of namespace Ice | 7736 } // end of namespace Ice |
| 7488 | 7737 |
| 7489 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7738 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |