OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 5657 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5668 SizeT Index3) { | 5668 SizeT Index3) { |
5669 constexpr SizeT SrcBit = 1 << 2; | 5669 constexpr SizeT SrcBit = 1 << 2; |
5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); |
5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); |
5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); |
5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); |
5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; |
5675 } | 5675 } |
5676 | 5676 |
5677 template <typename TraitsType> | 5677 template <typename TraitsType> |
| 5678 GlobalString TargetX86Base<TraitsType>::lowerShuffleVector_NewMaskName() { |
| 5679 GlobalString FuncName = Func->getFunctionName(); |
| 5680 const SizeT Id = PshufbMaskCount++; |
| 5681 if (!BuildDefs::dump() || !FuncName.hasStdString()) { |
| 5682 return GlobalString::createWithString( |
| 5683 Ctx, |
| 5684 "$PS" + std::to_string(FuncName.getID()) + "_" + std::to_string(Id)); |
| 5685 } |
| 5686 return GlobalString::createWithString( |
| 5687 Ctx, "Pshufb$" + Func->getFunctionName() + "$" + std::to_string(Id)); |
| 5688 } |
| 5689 |
| 5690 template <typename TraitsType> |
| 5691 ConstantRelocatable * |
| 5692 TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask( |
| 5693 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, |
| 5694 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, |
| 5695 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, |
| 5696 int8_t Idx15) { |
| 5697 static constexpr uint8_t NumElements = 16; |
| 5698 const char Initializer[NumElements] = { |
| 5699 Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7, |
| 5700 Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15, |
| 5701 }; |
| 5702 |
| 5703 static constexpr Type V4VectorType = IceType_v4i32; |
| 5704 const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType); |
| 5705 auto *Mask = VariableDeclaration::create(Func->getGlobalPool()); |
| 5706 GlobalString MaskName = lowerShuffleVector_NewMaskName(); |
| 5707 Mask->setIsConstant(true); |
| 5708 Mask->addInitializer(VariableDeclaration::DataInitializer::create( |
| 5709 Func->getGlobalPool(), Initializer, NumElements)); |
| 5710 Mask->setName(MaskName); |
| 5711 // Mask needs to be 16-byte aligned, or pshufb will seg fault. |
| 5712 Mask->setAlignment(MaskAlignment); |
| 5713 Func->addGlobal(Mask); |
| 5714 |
| 5715 constexpr RelocOffsetT Offset = 0; |
| 5716 return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName)); |
| 5717 } |
| 5718 |
| 5719 template <typename TraitsType> |
| 5720 void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb( |
| 5721 Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1, |
| 5722 int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6, |
| 5723 int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11, |
| 5724 int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) { |
| 5725 const Type DestTy = Dest->getType(); |
| 5726 static constexpr bool NotRebased = false; |
| 5727 static constexpr Variable *NoBase = nullptr; |
| 5728 // We use void for the memory operand instead of DestTy because using the |
| 5729 // latter causes a validation failure: the X86 Inst layer complains that |
| 5730 // vector mem operands could be under aligned. Thus, using void we avoid the |
| 5731 // validation error. Note that the mask global declaration is aligned, so it |
| 5732 // can be used as an XMM mem operand. |
| 5733 static constexpr Type MaskType = IceType_void; |
| 5734 #define IDX_IN_SRC(N, S) \ |
| 5735 ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS) |
| 5736 auto *Mask0M = X86OperandMem::create( |
| 5737 Func, MaskType, NoBase, |
| 5738 lowerShuffleVector_CreatePshufbMask( |
| 5739 IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0), |
| 5740 IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0), |
| 5741 IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0), |
| 5742 IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0), |
| 5743 IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0), |
| 5744 IDX_IN_SRC(Idx15, 0)), |
| 5745 NotRebased); |
| 5746 auto *Mask1M = X86OperandMem::create( |
| 5747 Func, MaskType, NoBase, |
| 5748 lowerShuffleVector_CreatePshufbMask( |
| 5749 IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1), |
| 5750 IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1), |
| 5751 IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1), |
| 5752 IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1), |
| 5753 IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1), |
| 5754 IDX_IN_SRC(Idx15, 1)), |
| 5755 NotRebased); |
| 5756 #undef IDX_IN_SRC |
| 5757 auto *T0 = makeReg(DestTy); |
| 5758 auto *T1 = makeReg(DestTy); |
| 5759 auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 5760 _movp(T0, Src0RM); |
| 5761 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 5762 _movp(T1, Src1RM); |
| 5763 |
| 5764 _pshufb(T1, Mask1M); |
| 5765 _pshufb(T0, Mask0M); |
| 5766 _por(T1, T0); |
| 5767 _movp(Dest, T1); |
| 5768 } |
| 5769 |
| 5770 template <typename TraitsType> |
5678 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5771 void TargetX86Base<TraitsType>::lowerShuffleVector( |
5679 const InstShuffleVector *Instr) { | 5772 const InstShuffleVector *Instr) { |
5680 auto *Dest = Instr->getDest(); | 5773 auto *Dest = Instr->getDest(); |
5681 const Type DestTy = Dest->getType(); | 5774 const Type DestTy = Dest->getType(); |
5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | 5775 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); |
5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | 5776 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); |
5684 const SizeT NumElements = typeNumElements(DestTy); | 5777 const SizeT NumElements = typeNumElements(DestTy); |
5685 | 5778 |
5686 auto *T = makeReg(DestTy); | 5779 auto *T = makeReg(DestTy); |
5687 | 5780 |
5688 switch (DestTy) { | 5781 switch (DestTy) { |
5689 default: | 5782 default: |
5690 break; | 5783 llvm::report_fatal_error("Unexpected vector type."); |
5691 // TODO(jpp): figure out how to properly lower the remaining cases without | 5784 case IceType_v16i1: |
5692 // scalarization. | 5785 case IceType_v16i8: { |
| 5786 if (InstructionSet < Traits::SSE4_1) { |
| 5787 // TODO(jpp): figure out how to lower with sse2. |
| 5788 break; |
| 5789 } |
| 5790 static constexpr SizeT ExpectedNumElements = 16; |
| 5791 assert(ExpectedNumElements == Instr->getNumIndexes()); |
| 5792 (void)ExpectedNumElements; |
| 5793 const SizeT Index0 = Instr->getIndex(0)->getValue(); |
| 5794 const SizeT Index1 = Instr->getIndex(1)->getValue(); |
| 5795 const SizeT Index2 = Instr->getIndex(2)->getValue(); |
| 5796 const SizeT Index3 = Instr->getIndex(3)->getValue(); |
| 5797 const SizeT Index4 = Instr->getIndex(4)->getValue(); |
| 5798 const SizeT Index5 = Instr->getIndex(5)->getValue(); |
| 5799 const SizeT Index6 = Instr->getIndex(6)->getValue(); |
| 5800 const SizeT Index7 = Instr->getIndex(7)->getValue(); |
| 5801 const SizeT Index8 = Instr->getIndex(8)->getValue(); |
| 5802 const SizeT Index9 = Instr->getIndex(9)->getValue(); |
| 5803 const SizeT Index10 = Instr->getIndex(10)->getValue(); |
| 5804 const SizeT Index11 = Instr->getIndex(11)->getValue(); |
| 5805 const SizeT Index12 = Instr->getIndex(12)->getValue(); |
| 5806 const SizeT Index13 = Instr->getIndex(13)->getValue(); |
| 5807 const SizeT Index14 = Instr->getIndex(14)->getValue(); |
| 5808 const SizeT Index15 = Instr->getIndex(15)->getValue(); |
| 5809 lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2, |
| 5810 Index3, Index4, Index5, Index6, Index7, |
| 5811 Index8, Index9, Index10, Index11, Index12, |
| 5812 Index13, Index14, Index15); |
| 5813 return; |
| 5814 } |
| 5815 case IceType_v8i1: |
| 5816 case IceType_v8i16: { |
| 5817 if (InstructionSet < Traits::SSE4_1) { |
| 5818 // TODO(jpp): figure out how to lower with sse2. |
| 5819 break; |
| 5820 } |
| 5821 static constexpr SizeT ExpectedNumElements = 8; |
| 5822 assert(ExpectedNumElements == Instr->getNumIndexes()); |
| 5823 (void)ExpectedNumElements; |
| 5824 const SizeT Index0 = Instr->getIndex(0)->getValue(); |
| 5825 const SizeT Index1 = Instr->getIndex(1)->getValue(); |
| 5826 const SizeT Index2 = Instr->getIndex(2)->getValue(); |
| 5827 const SizeT Index3 = Instr->getIndex(3)->getValue(); |
| 5828 const SizeT Index4 = Instr->getIndex(4)->getValue(); |
| 5829 const SizeT Index5 = Instr->getIndex(5)->getValue(); |
| 5830 const SizeT Index6 = Instr->getIndex(6)->getValue(); |
| 5831 const SizeT Index7 = Instr->getIndex(7)->getValue(); |
| 5832 #define TO_BYTE_INDEX(I) ((I) << 1) |
| 5833 lowerShuffleVector_UsingPshufb( |
| 5834 Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1, |
| 5835 TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2), |
| 5836 TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3), |
| 5837 TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4), |
| 5838 TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5), |
| 5839 TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6), |
| 5840 TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7), |
| 5841 TO_BYTE_INDEX(Index7) + 1); |
| 5842 #undef TO_BYTE_INDEX |
| 5843 return; |
| 5844 } |
5693 case IceType_v4i1: | 5845 case IceType_v4i1: |
5694 case IceType_v4i32: | 5846 case IceType_v4i32: |
5695 case IceType_v4f32: { | 5847 case IceType_v4f32: { |
5696 static constexpr SizeT ExpectedNumElements = 4; | 5848 static constexpr SizeT ExpectedNumElements = 4; |
5697 assert(ExpectedNumElements == Instr->getNumIndexes()); | 5849 assert(ExpectedNumElements == Instr->getNumIndexes()); |
5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); | 5850 const SizeT Index0 = Instr->getIndex(0)->getValue(); |
5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); | 5851 const SizeT Index1 = Instr->getIndex(1)->getValue(); |
5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); | 5852 const SizeT Index2 = Instr->getIndex(2)->getValue(); |
5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); | 5853 const SizeT Index3 = Instr->getIndex(3)->getValue(); |
5702 Variable *T = nullptr; | 5854 Variable *T = nullptr; |
(...skipping 2087 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7790 emitGlobal(*Var, SectionSuffix); | 7942 emitGlobal(*Var, SectionSuffix); |
7791 } | 7943 } |
7792 } | 7944 } |
7793 } break; | 7945 } break; |
7794 } | 7946 } |
7795 } | 7947 } |
7796 } // end of namespace X86NAMESPACE | 7948 } // end of namespace X86NAMESPACE |
7797 } // end of namespace Ice | 7949 } // end of namespace Ice |
7798 | 7950 |
7799 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7951 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |