Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(400)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1917863004: Subzero. X86. Uses pshufb for shufflevector lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Deterministic table name." Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | unittest/AssemblerX8632/XmmArith.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 5657 matching lines...) Expand 10 before | Expand all | Expand 10 after
5668 SizeT Index3) { 5668 SizeT Index3) {
5669 constexpr SizeT SrcBit = 1 << 2; 5669 constexpr SizeT SrcBit = 1 << 2;
5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); 5670 const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0);
5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); 5671 const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1);
5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); 5672 const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2);
5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); 5673 const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3);
5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits; 5674 return Index0Bits | Index1Bits | Index2Bits | Index3Bits;
5675 } 5675 }
5676 5676
5677 template <typename TraitsType> 5677 template <typename TraitsType>
5678 GlobalString TargetX86Base<TraitsType>::lowerShuffleVector_NewMaskName() {
5679 GlobalString FuncName = Func->getFunctionName();
5680 const SizeT Id = PshufbMaskCount++;
5681 if (!BuildDefs::dump() || !FuncName.hasStdString()) {
5682 return GlobalString::createWithString(
5683 Ctx,
5684 "$PS" + std::to_string(FuncName.getID()) + "_" + std::to_string(Id));
5685 }
5686 return GlobalString::createWithString(
5687 Ctx, "Pshufb$" + Func->getFunctionName() + "$" + std::to_string(Id));
5688 }
5689
5690 template <typename TraitsType>
5691 ConstantRelocatable *
5692 TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask(
5693 int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4,
5694 int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9,
5695 int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14,
5696 int8_t Idx15) {
5697 static constexpr uint8_t NumElements = 16;
5698 const char Initializer[NumElements] = {
5699 Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7,
5700 Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15,
5701 };
5702
5703 static constexpr Type V4VectorType = IceType_v4i32;
5704 const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType);
5705 auto *Mask = VariableDeclaration::create(Func->getGlobalPool());
5706 GlobalString MaskName = lowerShuffleVector_NewMaskName();
5707 Mask->setIsConstant(true);
5708 Mask->addInitializer(VariableDeclaration::DataInitializer::create(
5709 Func->getGlobalPool(), Initializer, NumElements));
5710 Mask->setName(MaskName);
5711 // Mask needs to be 16-byte aligned, or pshufb will seg fault.
5712 Mask->setAlignment(MaskAlignment);
5713 Func->addGlobal(Mask);
5714
5715 constexpr RelocOffsetT Offset = 0;
5716 return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName));
5717 }
5718
5719 template <typename TraitsType>
5720 void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb(
5721 Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1,
5722 int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6,
5723 int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11,
5724 int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) {
5725 const Type DestTy = Dest->getType();
5726 static constexpr bool NotRebased = false;
5727 static constexpr Variable *NoBase = nullptr;
5728 // We use void for the memory operand instead of DestTy because using the
5729 // latter causes a validation failure: the X86 Inst layer complains that
5730 // vector mem operands could be under aligned. Thus, using void we avoid the
5731 // validation error. Note that the mask global declaration is aligned, so it
5732 // can be used as an XMM mem operand.
5733 static constexpr Type MaskType = IceType_void;
5734 #define IDX_IN_SRC(N, S) \
5735 ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS)
5736 auto *Mask0M = X86OperandMem::create(
5737 Func, MaskType, NoBase,
5738 lowerShuffleVector_CreatePshufbMask(
5739 IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0),
5740 IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0),
5741 IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0),
5742 IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0),
5743 IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0),
5744 IDX_IN_SRC(Idx15, 0)),
5745 NotRebased);
5746 auto *Mask1M = X86OperandMem::create(
5747 Func, MaskType, NoBase,
5748 lowerShuffleVector_CreatePshufbMask(
5749 IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1),
5750 IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1),
5751 IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1),
5752 IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1),
5753 IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1),
5754 IDX_IN_SRC(Idx15, 1)),
5755 NotRebased);
5756 #undef IDX_IN_SRC
5757 auto *T0 = makeReg(DestTy);
5758 auto *T1 = makeReg(DestTy);
5759 auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
5760 _movp(T0, Src0RM);
5761 auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
5762 _movp(T1, Src1RM);
5763
5764 _pshufb(T1, Mask1M);
5765 _pshufb(T0, Mask0M);
5766 _por(T1, T0);
5767 _movp(Dest, T1);
5768 }
5769
5770 template <typename TraitsType>
5678 void TargetX86Base<TraitsType>::lowerShuffleVector( 5771 void TargetX86Base<TraitsType>::lowerShuffleVector(
5679 const InstShuffleVector *Instr) { 5772 const InstShuffleVector *Instr) {
5680 auto *Dest = Instr->getDest(); 5773 auto *Dest = Instr->getDest();
5681 const Type DestTy = Dest->getType(); 5774 const Type DestTy = Dest->getType();
5682 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); 5775 auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0));
5683 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); 5776 auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1));
5684 const SizeT NumElements = typeNumElements(DestTy); 5777 const SizeT NumElements = typeNumElements(DestTy);
5685 5778
5686 auto *T = makeReg(DestTy); 5779 auto *T = makeReg(DestTy);
5687 5780
5688 switch (DestTy) { 5781 switch (DestTy) {
5689 default: 5782 default:
5690 break; 5783 llvm::report_fatal_error("Unexpected vector type.");
5691 // TODO(jpp): figure out how to properly lower the remaining cases without 5784 case IceType_v16i1:
5692 // scalarization. 5785 case IceType_v16i8: {
5786 if (InstructionSet < Traits::SSE4_1) {
5787 // TODO(jpp): figure out how to lower with sse2.
5788 break;
5789 }
5790 static constexpr SizeT ExpectedNumElements = 16;
5791 assert(ExpectedNumElements == Instr->getNumIndexes());
5792 (void)ExpectedNumElements;
5793 const SizeT Index0 = Instr->getIndex(0)->getValue();
5794 const SizeT Index1 = Instr->getIndex(1)->getValue();
5795 const SizeT Index2 = Instr->getIndex(2)->getValue();
5796 const SizeT Index3 = Instr->getIndex(3)->getValue();
5797 const SizeT Index4 = Instr->getIndex(4)->getValue();
5798 const SizeT Index5 = Instr->getIndex(5)->getValue();
5799 const SizeT Index6 = Instr->getIndex(6)->getValue();
5800 const SizeT Index7 = Instr->getIndex(7)->getValue();
5801 const SizeT Index8 = Instr->getIndex(8)->getValue();
5802 const SizeT Index9 = Instr->getIndex(9)->getValue();
5803 const SizeT Index10 = Instr->getIndex(10)->getValue();
5804 const SizeT Index11 = Instr->getIndex(11)->getValue();
5805 const SizeT Index12 = Instr->getIndex(12)->getValue();
5806 const SizeT Index13 = Instr->getIndex(13)->getValue();
5807 const SizeT Index14 = Instr->getIndex(14)->getValue();
5808 const SizeT Index15 = Instr->getIndex(15)->getValue();
5809 lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2,
5810 Index3, Index4, Index5, Index6, Index7,
5811 Index8, Index9, Index10, Index11, Index12,
5812 Index13, Index14, Index15);
5813 return;
5814 }
5815 case IceType_v8i1:
5816 case IceType_v8i16: {
5817 if (InstructionSet < Traits::SSE4_1) {
5818 // TODO(jpp): figure out how to lower with sse2.
5819 break;
5820 }
5821 static constexpr SizeT ExpectedNumElements = 8;
5822 assert(ExpectedNumElements == Instr->getNumIndexes());
5823 (void)ExpectedNumElements;
5824 const SizeT Index0 = Instr->getIndex(0)->getValue();
5825 const SizeT Index1 = Instr->getIndex(1)->getValue();
5826 const SizeT Index2 = Instr->getIndex(2)->getValue();
5827 const SizeT Index3 = Instr->getIndex(3)->getValue();
5828 const SizeT Index4 = Instr->getIndex(4)->getValue();
5829 const SizeT Index5 = Instr->getIndex(5)->getValue();
5830 const SizeT Index6 = Instr->getIndex(6)->getValue();
5831 const SizeT Index7 = Instr->getIndex(7)->getValue();
5832 #define TO_BYTE_INDEX(I) ((I) << 1)
5833 lowerShuffleVector_UsingPshufb(
5834 Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1,
5835 TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2),
5836 TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3),
5837 TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4),
5838 TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5),
5839 TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6),
5840 TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7),
5841 TO_BYTE_INDEX(Index7) + 1);
5842 #undef TO_BYTE_INDEX
5843 return;
5844 }
5693 case IceType_v4i1: 5845 case IceType_v4i1:
5694 case IceType_v4i32: 5846 case IceType_v4i32:
5695 case IceType_v4f32: { 5847 case IceType_v4f32: {
5696 static constexpr SizeT ExpectedNumElements = 4; 5848 static constexpr SizeT ExpectedNumElements = 4;
5697 assert(ExpectedNumElements == Instr->getNumIndexes()); 5849 assert(ExpectedNumElements == Instr->getNumIndexes());
5698 const SizeT Index0 = Instr->getIndex(0)->getValue(); 5850 const SizeT Index0 = Instr->getIndex(0)->getValue();
5699 const SizeT Index1 = Instr->getIndex(1)->getValue(); 5851 const SizeT Index1 = Instr->getIndex(1)->getValue();
5700 const SizeT Index2 = Instr->getIndex(2)->getValue(); 5852 const SizeT Index2 = Instr->getIndex(2)->getValue();
5701 const SizeT Index3 = Instr->getIndex(3)->getValue(); 5853 const SizeT Index3 = Instr->getIndex(3)->getValue();
5702 Variable *T = nullptr; 5854 Variable *T = nullptr;
(...skipping 2087 matching lines...) Expand 10 before | Expand all | Expand 10 after
7790 emitGlobal(*Var, SectionSuffix); 7942 emitGlobal(*Var, SectionSuffix);
7791 } 7943 }
7792 } 7944 }
7793 } break; 7945 } break;
7794 } 7946 }
7795 } 7947 }
7796 } // end of namespace X86NAMESPACE 7948 } // end of namespace X86NAMESPACE
7797 } // end of namespace Ice 7949 } // end of namespace Ice
7798 7950
7799 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7951 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | unittest/AssemblerX8632/XmmArith.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698