OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 5657 matching lines...) Loading... | |
5668 SizeT Index3) { | 5668 SizeT Index3) { |
5669 constexpr SizeT SrcBit = 1 << 2; | 5669 constexpr SizeT SrcBit = 1 << 2; |
5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); |
5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); |
5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); |
5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); |
5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; |
5675 } | 5675 } |
5676 | 5676 |
5677 template <typename TraitsType> | 5677 template <typename TraitsType> |
5678 ConstantRelocatable * | |
5679 TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask( | |
5680 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, | |
5681 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, | |
5682 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, | |
5683 int8_t Idx15) { | |
5684 static constexpr uint8_t NumElements = 16; | |
5685 const char Initializer[NumElements] = { | |
5686 Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7, | |
5687 Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15, | |
5688 }; | |
5689 | |
5690 static constexpr bool SuppressMangling = false; | |
Jim Stichnoth
2016/04/26 16:42:56
I think it doesn't matter whatsoever if mangling i
John
2016/04/26 16:52:27
Done.
| |
5691 static constexpr Type V4VectorType = IceType_v4i32; | |
5692 const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType); | |
5693 auto *Mask = | |
5694 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); | |
5695 GlobalString MaskName = | |
Jim Stichnoth
2016/04/26 16:42:57
This whole function makes me sad, but I guess what
John
2016/04/26 16:52:27
Why? What's wrong with it?
Jim Stichnoth
2016/04/26 17:03:16
Ideally, you would call GlobalContext::getConstant
| |
5696 Ctx->getGlobalString("Pshufb$" + Func->getFunctionName() + "$" + | |
5697 std::to_string(PshufbMaskCount++)); | |
5698 Mask->setIsConstant(true); | |
5699 Mask->addInitializer(VariableDeclaration::DataInitializer::create( | |
5700 Func->getGlobalPool(), Initializer, NumElements)); | |
5701 Mask->setName(MaskName); | |
5702 // Mask needs to be 16-byte aligned, or pshufb will seg fault. | |
5703 Mask->setAlignment(MaskAlignment); | |
5704 Func->addGlobal(Mask); | |
5705 | |
5706 constexpr RelocOffsetT Offset = 0; | |
5707 return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName)); | |
5708 } | |
5709 | |
5710 template <typename TraitsType> | |
5711 void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb( | |
5712 Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1, | |
5713 int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6, | |
5714 int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11, | |
5715 int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) { | |
5716 const Type DestTy = Dest->getType(); | |
5717 static constexpr bool NotRebased = false; | |
5718 static constexpr Variable *NoBase = nullptr; | |
5719 // We use void for the memory operand instead of DestTy because using the | |
5720 // latter causes a validation failure: the X86 Inst layer complains that | |
5721 // vector mem operands could be under aligned. Thus, using void we avoid the | |
5722 // validation error. Note that the mask global declaration is aligned, so it | |
5723 // can be used as an XMM mem operand. | |
5724 static constexpr Type MaskType = IceType_void; | |
5725 #define IDX_IN_SRC(N, S) \ | |
5726 ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS) | |
5727 auto *Mask0M = X86OperandMem::create( | |
5728 Func, MaskType, NoBase, | |
5729 lowerShuffleVector_CreatePshufbMask( | |
5730 IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0), | |
5731 IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0), | |
5732 IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0), | |
5733 IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0), | |
5734 IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0), | |
5735 IDX_IN_SRC(Idx15, 0)), | |
5736 NotRebased); | |
5737 auto *Mask1M = X86OperandMem::create( | |
5738 Func, MaskType, NoBase, | |
5739 lowerShuffleVector_CreatePshufbMask( | |
5740 IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1), | |
5741 IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1), | |
5742 IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1), | |
5743 IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1), | |
5744 IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1), | |
5745 IDX_IN_SRC(Idx15, 1)), | |
5746 NotRebased); | |
5747 #undef IDX_IN_SRC | |
5748 auto *T0 = makeReg(DestTy); | |
5749 auto *T1 = makeReg(DestTy); | |
5750 auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
5751 _movp(T0, Src0RM); | |
5752 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
5753 _movp(T1, Src1RM); | |
5754 | |
5755 _pshufb(T1, Mask1M); | |
5756 _pshufb(T0, Mask0M); | |
5757 _por(T1, T0); | |
5758 _movp(Dest, T1); | |
5759 } | |
5760 | |
5761 template <typename TraitsType> | |
5678 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5762 void TargetX86Base<TraitsType>::lowerShuffleVector( |
5679 const InstShuffleVector *Instr) { | 5763 const InstShuffleVector *Instr) { |
5680 auto *Dest = Instr->getDest(); | 5764 auto *Dest = Instr->getDest(); |
5681 const Type DestTy = Dest->getType(); | 5765 const Type DestTy = Dest->getType(); |
5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | 5766 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); |
5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | 5767 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); |
5684 const SizeT NumElements = typeNumElements(DestTy); | 5768 const SizeT NumElements = typeNumElements(DestTy); |
5685 | 5769 |
5686 auto *T = makeReg(DestTy); | 5770 auto *T = makeReg(DestTy); |
5687 | 5771 |
5688 switch (DestTy) { | 5772 switch (DestTy) { |
5689 default: | 5773 default: |
5690 break; | 5774 llvm::report_fatal_error("Unexpected vector type."); |
5691 // TODO(jpp): figure out how to properly lower the remaining cases without | 5775 case IceType_v16i1: |
5692 // scalarization. | 5776 case IceType_v16i8: { |
5777 if (InstructionSet < Traits::SSE4_1) { | |
5778 // TODO(jpp): figure out how to lower with sse2. | |
5779 break; | |
5780 } | |
5781 static constexpr SizeT ExpectedNumElements = 16; | |
5782 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
5783 (void)ExpectedNumElements; | |
5784 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
5785 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
5786 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
5787 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
5788 const SizeT Index4 = Instr->getIndex(4)->getValue(); | |
5789 const SizeT Index5 = Instr->getIndex(5)->getValue(); | |
5790 const SizeT Index6 = Instr->getIndex(6)->getValue(); | |
5791 const SizeT Index7 = Instr->getIndex(7)->getValue(); | |
5792 const SizeT Index8 = Instr->getIndex(8)->getValue(); | |
5793 const SizeT Index9 = Instr->getIndex(9)->getValue(); | |
5794 const SizeT Index10 = Instr->getIndex(10)->getValue(); | |
5795 const SizeT Index11 = Instr->getIndex(11)->getValue(); | |
5796 const SizeT Index12 = Instr->getIndex(12)->getValue(); | |
5797 const SizeT Index13 = Instr->getIndex(13)->getValue(); | |
5798 const SizeT Index14 = Instr->getIndex(14)->getValue(); | |
5799 const SizeT Index15 = Instr->getIndex(15)->getValue(); | |
5800 lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2, | |
5801 Index3, Index4, Index5, Index6, Index7, | |
5802 Index8, Index9, Index10, Index11, Index12, | |
5803 Index13, Index14, Index15); | |
5804 return; | |
5805 } | |
5806 case IceType_v8i1: | |
5807 case IceType_v8i16: { | |
5808 if (InstructionSet < Traits::SSE4_1) { | |
5809 // TODO(jpp): figure out how to lower with sse2. | |
5810 break; | |
5811 } | |
5812 static constexpr SizeT ExpectedNumElements = 8; | |
5813 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
5814 (void)ExpectedNumElements; | |
5815 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
5816 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
5817 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
5818 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
5819 const SizeT Index4 = Instr->getIndex(4)->getValue(); | |
5820 const SizeT Index5 = Instr->getIndex(5)->getValue(); | |
5821 const SizeT Index6 = Instr->getIndex(6)->getValue(); | |
5822 const SizeT Index7 = Instr->getIndex(7)->getValue(); | |
5823 #define TO_BYTE_INDEX(I) ((I) << 1) | |
5824 lowerShuffleVector_UsingPshufb( | |
5825 Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1, | |
5826 TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2), | |
5827 TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3), | |
5828 TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4), | |
5829 TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5), | |
5830 TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6), | |
5831 TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7), | |
5832 TO_BYTE_INDEX(Index7) + 1); | |
5833 #undef TO_BYTE_INDEX | |
5834 return; | |
5835 } | |
5693 case IceType_v4i1: | 5836 case IceType_v4i1: |
5694 case IceType_v4i32: | 5837 case IceType_v4i32: |
5695 case IceType_v4f32: { | 5838 case IceType_v4f32: { |
5696 static constexpr SizeT ExpectedNumElements = 4; | 5839 static constexpr SizeT ExpectedNumElements = 4; |
5697 assert(ExpectedNumElements == Instr->getNumIndexes()); | 5840 assert(ExpectedNumElements == Instr->getNumIndexes()); |
5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); | 5841 const SizeT Index0 = Instr->getIndex(0)->getValue(); |
5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); | 5842 const SizeT Index1 = Instr->getIndex(1)->getValue(); |
5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); | 5843 const SizeT Index2 = Instr->getIndex(2)->getValue(); |
5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); | 5844 const SizeT Index3 = Instr->getIndex(3)->getValue(); |
5702 Variable *T = nullptr; | 5845 Variable *T = nullptr; |
(...skipping 2087 matching lines...) Loading... | |
7790 emitGlobal(*Var, SectionSuffix); | 7933 emitGlobal(*Var, SectionSuffix); |
7791 } | 7934 } |
7792 } | 7935 } |
7793 } break; | 7936 } break; |
7794 } | 7937 } |
7795 } | 7938 } |
7796 } // end of namespace X86NAMESPACE | 7939 } // end of namespace X86NAMESPACE |
7797 } // end of namespace Ice | 7940 } // end of namespace Ice |
7798 | 7941 |
7799 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7942 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |