Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 5657 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5668 SizeT Index3) { | 5668 SizeT Index3) { |
| 5669 constexpr SizeT SrcBit = 1 << 2; | 5669 constexpr SizeT SrcBit = 1 << 2; |
| 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); |
| 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); |
| 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); |
| 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); |
| 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; |
| 5675 } | 5675 } |
| 5676 | 5676 |
| 5677 template <typename TraitsType> | 5677 template <typename TraitsType> |
| 5678 ConstantRelocatable * | |
| 5679 TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask( | |
| 5680 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, | |
| 5681 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, | |
| 5682 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, | |
| 5683 int8_t Idx15) { | |
| 5684 static constexpr uint8_t NumElements = 16; | |
| 5685 const char Initializer[NumElements] = { | |
| 5686 Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7, | |
| 5687 Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15, | |
| 5688 }; | |
| 5689 | |
| 5690 static constexpr bool SuppressMangling = false; | |
|
Jim Stichnoth
2016/04/26 16:42:56
I think it doesn't matter whatsoever if mangling i
John
2016/04/26 16:52:27
Done.
| |
| 5691 static constexpr Type V4VectorType = IceType_v4i32; | |
| 5692 const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType); | |
| 5693 auto *Mask = | |
| 5694 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); | |
| 5695 GlobalString MaskName = | |
|
Jim Stichnoth
2016/04/26 16:42:57
This whole function makes me sad, but I guess what
John
2016/04/26 16:52:27
Why? What's wrong with it?
Jim Stichnoth
2016/04/26 17:03:16
Ideally, you would call GlobalContext::getConstant
| |
| 5696 Ctx->getGlobalString("Pshufb$" + Func->getFunctionName() + "$" + | |
| 5697 std::to_string(PshufbMaskCount++)); | |
| 5698 Mask->setIsConstant(true); | |
| 5699 Mask->addInitializer(VariableDeclaration::DataInitializer::create( | |
| 5700 Func->getGlobalPool(), Initializer, NumElements)); | |
| 5701 Mask->setName(MaskName); | |
| 5702 // Mask needs to be 16-byte aligned, or pshufb will seg fault. | |
| 5703 Mask->setAlignment(MaskAlignment); | |
| 5704 Func->addGlobal(Mask); | |
| 5705 | |
| 5706 constexpr RelocOffsetT Offset = 0; | |
| 5707 return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName)); | |
| 5708 } | |
| 5709 | |
| 5710 template <typename TraitsType> | |
| 5711 void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb( | |
| 5712 Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1, | |
| 5713 int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6, | |
| 5714 int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11, | |
| 5715 int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) { | |
| 5716 const Type DestTy = Dest->getType(); | |
| 5717 static constexpr bool NotRebased = false; | |
| 5718 static constexpr Variable *NoBase = nullptr; | |
| 5719 // We use void for the memory operand instead of DestTy because using the | |
| 5720 // latter causes a validation failure: the X86 Inst layer complains that | |
| 5721 // vector mem operands could be under aligned. Thus, using void we avoid the | |
| 5722 // validation error. Note that the mask global declaration is aligned, so it | |
| 5723 // can be used as an XMM mem operand. | |
| 5724 static constexpr Type MaskType = IceType_void; | |
| 5725 #define IDX_IN_SRC(N, S) \ | |
| 5726 ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS) | |
| 5727 auto *Mask0M = X86OperandMem::create( | |
| 5728 Func, MaskType, NoBase, | |
| 5729 lowerShuffleVector_CreatePshufbMask( | |
| 5730 IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0), | |
| 5731 IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0), | |
| 5732 IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0), | |
| 5733 IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0), | |
| 5734 IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0), | |
| 5735 IDX_IN_SRC(Idx15, 0)), | |
| 5736 NotRebased); | |
| 5737 auto *Mask1M = X86OperandMem::create( | |
| 5738 Func, MaskType, NoBase, | |
| 5739 lowerShuffleVector_CreatePshufbMask( | |
| 5740 IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1), | |
| 5741 IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1), | |
| 5742 IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1), | |
| 5743 IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1), | |
| 5744 IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1), | |
| 5745 IDX_IN_SRC(Idx15, 1)), | |
| 5746 NotRebased); | |
| 5747 #undef IDX_IN_SRC | |
| 5748 auto *T0 = makeReg(DestTy); | |
| 5749 auto *T1 = makeReg(DestTy); | |
| 5750 auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 5751 _movp(T0, Src0RM); | |
| 5752 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
| 5753 _movp(T1, Src1RM); | |
| 5754 | |
| 5755 _pshufb(T1, Mask1M); | |
| 5756 _pshufb(T0, Mask0M); | |
| 5757 _por(T1, T0); | |
| 5758 _movp(Dest, T1); | |
| 5759 } | |
| 5760 | |
| 5761 template <typename TraitsType> | |
| 5678 void TargetX86Base<TraitsType>::lowerShuffleVector( | 5762 void TargetX86Base<TraitsType>::lowerShuffleVector( |
| 5679 const InstShuffleVector *Instr) { | 5763 const InstShuffleVector *Instr) { |
| 5680 auto *Dest = Instr->getDest(); | 5764 auto *Dest = Instr->getDest(); |
| 5681 const Type DestTy = Dest->getType(); | 5765 const Type DestTy = Dest->getType(); |
| 5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | 5766 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); |
| 5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | 5767 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); |
| 5684 const SizeT NumElements = typeNumElements(DestTy); | 5768 const SizeT NumElements = typeNumElements(DestTy); |
| 5685 | 5769 |
| 5686 auto *T = makeReg(DestTy); | 5770 auto *T = makeReg(DestTy); |
| 5687 | 5771 |
| 5688 switch (DestTy) { | 5772 switch (DestTy) { |
| 5689 default: | 5773 default: |
| 5690 break; | 5774 llvm::report_fatal_error("Unexpected vector type."); |
| 5691 // TODO(jpp): figure out how to properly lower the remaining cases without | 5775 case IceType_v16i1: |
| 5692 // scalarization. | 5776 case IceType_v16i8: { |
| 5777 if (InstructionSet < Traits::SSE4_1) { | |
| 5778 // TODO(jpp): figure out how to lower with sse2. | |
| 5779 break; | |
| 5780 } | |
| 5781 static constexpr SizeT ExpectedNumElements = 16; | |
| 5782 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
| 5783 (void)ExpectedNumElements; | |
| 5784 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
| 5785 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
| 5786 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
| 5787 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
| 5788 const SizeT Index4 = Instr->getIndex(4)->getValue(); | |
| 5789 const SizeT Index5 = Instr->getIndex(5)->getValue(); | |
| 5790 const SizeT Index6 = Instr->getIndex(6)->getValue(); | |
| 5791 const SizeT Index7 = Instr->getIndex(7)->getValue(); | |
| 5792 const SizeT Index8 = Instr->getIndex(8)->getValue(); | |
| 5793 const SizeT Index9 = Instr->getIndex(9)->getValue(); | |
| 5794 const SizeT Index10 = Instr->getIndex(10)->getValue(); | |
| 5795 const SizeT Index11 = Instr->getIndex(11)->getValue(); | |
| 5796 const SizeT Index12 = Instr->getIndex(12)->getValue(); | |
| 5797 const SizeT Index13 = Instr->getIndex(13)->getValue(); | |
| 5798 const SizeT Index14 = Instr->getIndex(14)->getValue(); | |
| 5799 const SizeT Index15 = Instr->getIndex(15)->getValue(); | |
| 5800 lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2, | |
| 5801 Index3, Index4, Index5, Index6, Index7, | |
| 5802 Index8, Index9, Index10, Index11, Index12, | |
| 5803 Index13, Index14, Index15); | |
| 5804 return; | |
| 5805 } | |
| 5806 case IceType_v8i1: | |
| 5807 case IceType_v8i16: { | |
| 5808 if (InstructionSet < Traits::SSE4_1) { | |
| 5809 // TODO(jpp): figure out how to lower with sse2. | |
| 5810 break; | |
| 5811 } | |
| 5812 static constexpr SizeT ExpectedNumElements = 8; | |
| 5813 assert(ExpectedNumElements == Instr->getNumIndexes()); | |
| 5814 (void)ExpectedNumElements; | |
| 5815 const SizeT Index0 = Instr->getIndex(0)->getValue(); | |
| 5816 const SizeT Index1 = Instr->getIndex(1)->getValue(); | |
| 5817 const SizeT Index2 = Instr->getIndex(2)->getValue(); | |
| 5818 const SizeT Index3 = Instr->getIndex(3)->getValue(); | |
| 5819 const SizeT Index4 = Instr->getIndex(4)->getValue(); | |
| 5820 const SizeT Index5 = Instr->getIndex(5)->getValue(); | |
| 5821 const SizeT Index6 = Instr->getIndex(6)->getValue(); | |
| 5822 const SizeT Index7 = Instr->getIndex(7)->getValue(); | |
| 5823 #define TO_BYTE_INDEX(I) ((I) << 1) | |
| 5824 lowerShuffleVector_UsingPshufb( | |
| 5825 Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1, | |
| 5826 TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2), | |
| 5827 TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3), | |
| 5828 TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4), | |
| 5829 TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5), | |
| 5830 TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6), | |
| 5831 TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7), | |
| 5832 TO_BYTE_INDEX(Index7) + 1); | |
| 5833 #undef TO_BYTE_INDEX | |
| 5834 return; | |
| 5835 } | |
| 5693 case IceType_v4i1: | 5836 case IceType_v4i1: |
| 5694 case IceType_v4i32: | 5837 case IceType_v4i32: |
| 5695 case IceType_v4f32: { | 5838 case IceType_v4f32: { |
| 5696 static constexpr SizeT ExpectedNumElements = 4; | 5839 static constexpr SizeT ExpectedNumElements = 4; |
| 5697 assert(ExpectedNumElements == Instr->getNumIndexes()); | 5840 assert(ExpectedNumElements == Instr->getNumIndexes()); |
| 5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); | 5841 const SizeT Index0 = Instr->getIndex(0)->getValue(); |
| 5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); | 5842 const SizeT Index1 = Instr->getIndex(1)->getValue(); |
| 5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); | 5843 const SizeT Index2 = Instr->getIndex(2)->getValue(); |
| 5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); | 5844 const SizeT Index3 = Instr->getIndex(3)->getValue(); |
| 5702 Variable *T = nullptr; | 5845 Variable *T = nullptr; |
| (...skipping 2087 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 7790 emitGlobal(*Var, SectionSuffix); | 7933 emitGlobal(*Var, SectionSuffix); |
| 7791 } | 7934 } |
| 7792 } | 7935 } |
| 7793 } break; | 7936 } break; |
| 7794 } | 7937 } |
| 7795 } | 7938 } |
| 7796 } // end of namespace X86NAMESPACE | 7939 } // end of namespace X86NAMESPACE |
| 7797 } // end of namespace Ice | 7940 } // end of namespace Ice |
| 7798 | 7941 |
| 7799 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7942 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |