Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 35d7ea047e9c70eaa2e069af2f51217b388ff6cc..ea5c3d4e8656be14d9785d5dce7f54ae11f6525e 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -5675,6 +5675,90 @@ inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, |
| } |
| template <typename TraitsType> |
| +ConstantRelocatable * |
| +TargetX86Base<TraitsType>::lowerShuffleVector_CreatePshufbMask( |
| + int8_t Idx0, int8_t Idx1, int8_t Idx2, int8_t Idx3, int8_t Idx4, |
| + int8_t Idx5, int8_t Idx6, int8_t Idx7, int8_t Idx8, int8_t Idx9, |
| + int8_t Idx10, int8_t Idx11, int8_t Idx12, int8_t Idx13, int8_t Idx14, |
| + int8_t Idx15) { |
| + static constexpr uint8_t NumElements = 16; |
| + const char Initializer[NumElements] = { |
| + Idx0, Idx1, Idx2, Idx3, Idx4, Idx5, Idx6, Idx7, |
| + Idx8, Idx9, Idx10, Idx11, Idx12, Idx13, Idx14, Idx15, |
| + }; |
| + |
| + static constexpr bool SuppressMangling = false; |
|
Jim Stichnoth
2016/04/26 16:42:56
I think it doesn't matter whatsoever if mangling i
John
2016/04/26 16:52:27
Done.
|
| + static constexpr Type V4VectorType = IceType_v4i32; |
| + const uint32_t MaskAlignment = typeWidthInBytesOnStack(V4VectorType); |
| + auto *Mask = |
| + VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling); |
| + GlobalString MaskName = |
|
Jim Stichnoth
2016/04/26 16:42:57
This whole function makes me sad, but I guess what
John
2016/04/26 16:52:27
Why? What's wrong with it?
Jim Stichnoth
2016/04/26 17:03:16
Ideally, you would call GlobalContext::getConstant
|
| + Ctx->getGlobalString("Pshufb$" + Func->getFunctionName() + "$" + |
| + std::to_string(PshufbMaskCount++)); |
| + Mask->setIsConstant(true); |
| + Mask->addInitializer(VariableDeclaration::DataInitializer::create( |
| + Func->getGlobalPool(), Initializer, NumElements)); |
| + Mask->setName(MaskName); |
| + // Mask needs to be 16-byte aligned, or pshufb will seg fault. |
| + Mask->setAlignment(MaskAlignment); |
| + Func->addGlobal(Mask); |
| + |
| + constexpr RelocOffsetT Offset = 0; |
| + return llvm::cast<ConstantRelocatable>(Ctx->getConstantSym(Offset, MaskName)); |
| +} |
| + |
| +template <typename TraitsType> |
| +void TargetX86Base<TraitsType>::lowerShuffleVector_UsingPshufb( |
| + Variable *Dest, Operand *Src0, Operand *Src1, int8_t Idx0, int8_t Idx1, |
| + int8_t Idx2, int8_t Idx3, int8_t Idx4, int8_t Idx5, int8_t Idx6, |
| + int8_t Idx7, int8_t Idx8, int8_t Idx9, int8_t Idx10, int8_t Idx11, |
| + int8_t Idx12, int8_t Idx13, int8_t Idx14, int8_t Idx15) { |
| + const Type DestTy = Dest->getType(); |
| + static constexpr bool NotRebased = false; |
| + static constexpr Variable *NoBase = nullptr; |
| + // We use void for the memory operand instead of DestTy because using the |
| + // latter causes a validation failure: the X86 Inst layer complains that |
| + // vector mem operands could be under aligned. Thus, using void we avoid the |
| + // validation error. Note that the mask global declaration is aligned, so it |
| + // can be used as an XMM mem operand. |
| + static constexpr Type MaskType = IceType_void; |
| +#define IDX_IN_SRC(N, S) \ |
| + ((((N) & (1 << 4)) == (S << 4)) ? ((N)&0xf) : CLEAR_ALL_BITS) |
| + auto *Mask0M = X86OperandMem::create( |
| + Func, MaskType, NoBase, |
| + lowerShuffleVector_CreatePshufbMask( |
| + IDX_IN_SRC(Idx0, 0), IDX_IN_SRC(Idx1, 0), IDX_IN_SRC(Idx2, 0), |
| + IDX_IN_SRC(Idx3, 0), IDX_IN_SRC(Idx4, 0), IDX_IN_SRC(Idx5, 0), |
| + IDX_IN_SRC(Idx6, 0), IDX_IN_SRC(Idx7, 0), IDX_IN_SRC(Idx8, 0), |
| + IDX_IN_SRC(Idx9, 0), IDX_IN_SRC(Idx10, 0), IDX_IN_SRC(Idx11, 0), |
| + IDX_IN_SRC(Idx12, 0), IDX_IN_SRC(Idx13, 0), IDX_IN_SRC(Idx14, 0), |
| + IDX_IN_SRC(Idx15, 0)), |
| + NotRebased); |
| + auto *Mask1M = X86OperandMem::create( |
| + Func, MaskType, NoBase, |
| + lowerShuffleVector_CreatePshufbMask( |
| + IDX_IN_SRC(Idx0, 1), IDX_IN_SRC(Idx1, 1), IDX_IN_SRC(Idx2, 1), |
| + IDX_IN_SRC(Idx3, 1), IDX_IN_SRC(Idx4, 1), IDX_IN_SRC(Idx5, 1), |
| + IDX_IN_SRC(Idx6, 1), IDX_IN_SRC(Idx7, 1), IDX_IN_SRC(Idx8, 1), |
| + IDX_IN_SRC(Idx9, 1), IDX_IN_SRC(Idx10, 1), IDX_IN_SRC(Idx11, 1), |
| + IDX_IN_SRC(Idx12, 1), IDX_IN_SRC(Idx13, 1), IDX_IN_SRC(Idx14, 1), |
| + IDX_IN_SRC(Idx15, 1)), |
| + NotRebased); |
| +#undef IDX_IN_SRC |
| + auto *T0 = makeReg(DestTy); |
| + auto *T1 = makeReg(DestTy); |
| + auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| + _movp(T0, Src0RM); |
| + auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| + _movp(T1, Src1RM); |
| + |
| + _pshufb(T1, Mask1M); |
| + _pshufb(T0, Mask0M); |
| + _por(T1, T0); |
| + _movp(Dest, T1); |
| +} |
| + |
| +template <typename TraitsType> |
| void TargetX86Base<TraitsType>::lowerShuffleVector( |
| const InstShuffleVector *Instr) { |
| auto *Dest = Instr->getDest(); |
| @@ -5687,9 +5771,68 @@ void TargetX86Base<TraitsType>::lowerShuffleVector( |
| switch (DestTy) { |
| default: |
| - break; |
| - // TODO(jpp): figure out how to properly lower the remaining cases without |
| - // scalarization. |
| + llvm::report_fatal_error("Unexpected vector type."); |
| + case IceType_v16i1: |
| + case IceType_v16i8: { |
| + if (InstructionSet < Traits::SSE4_1) { |
| + // TODO(jpp): figure out how to lower with sse2. |
| + break; |
| + } |
| + static constexpr SizeT ExpectedNumElements = 16; |
| + assert(ExpectedNumElements == Instr->getNumIndexes()); |
| + (void)ExpectedNumElements; |
| + const SizeT Index0 = Instr->getIndex(0)->getValue(); |
| + const SizeT Index1 = Instr->getIndex(1)->getValue(); |
| + const SizeT Index2 = Instr->getIndex(2)->getValue(); |
| + const SizeT Index3 = Instr->getIndex(3)->getValue(); |
| + const SizeT Index4 = Instr->getIndex(4)->getValue(); |
| + const SizeT Index5 = Instr->getIndex(5)->getValue(); |
| + const SizeT Index6 = Instr->getIndex(6)->getValue(); |
| + const SizeT Index7 = Instr->getIndex(7)->getValue(); |
| + const SizeT Index8 = Instr->getIndex(8)->getValue(); |
| + const SizeT Index9 = Instr->getIndex(9)->getValue(); |
| + const SizeT Index10 = Instr->getIndex(10)->getValue(); |
| + const SizeT Index11 = Instr->getIndex(11)->getValue(); |
| + const SizeT Index12 = Instr->getIndex(12)->getValue(); |
| + const SizeT Index13 = Instr->getIndex(13)->getValue(); |
| + const SizeT Index14 = Instr->getIndex(14)->getValue(); |
| + const SizeT Index15 = Instr->getIndex(15)->getValue(); |
| + lowerShuffleVector_UsingPshufb(Dest, Src0, Src1, Index0, Index1, Index2, |
| + Index3, Index4, Index5, Index6, Index7, |
| + Index8, Index9, Index10, Index11, Index12, |
| + Index13, Index14, Index15); |
| + return; |
| + } |
| + case IceType_v8i1: |
| + case IceType_v8i16: { |
| + if (InstructionSet < Traits::SSE4_1) { |
| + // TODO(jpp): figure out how to lower with sse2. |
| + break; |
| + } |
| + static constexpr SizeT ExpectedNumElements = 8; |
| + assert(ExpectedNumElements == Instr->getNumIndexes()); |
| + (void)ExpectedNumElements; |
| + const SizeT Index0 = Instr->getIndex(0)->getValue(); |
| + const SizeT Index1 = Instr->getIndex(1)->getValue(); |
| + const SizeT Index2 = Instr->getIndex(2)->getValue(); |
| + const SizeT Index3 = Instr->getIndex(3)->getValue(); |
| + const SizeT Index4 = Instr->getIndex(4)->getValue(); |
| + const SizeT Index5 = Instr->getIndex(5)->getValue(); |
| + const SizeT Index6 = Instr->getIndex(6)->getValue(); |
| + const SizeT Index7 = Instr->getIndex(7)->getValue(); |
| +#define TO_BYTE_INDEX(I) ((I) << 1) |
| + lowerShuffleVector_UsingPshufb( |
| + Dest, Src0, Src1, TO_BYTE_INDEX(Index0), TO_BYTE_INDEX(Index0) + 1, |
| + TO_BYTE_INDEX(Index1), TO_BYTE_INDEX(Index1) + 1, TO_BYTE_INDEX(Index2), |
| + TO_BYTE_INDEX(Index2) + 1, TO_BYTE_INDEX(Index3), |
| + TO_BYTE_INDEX(Index3) + 1, TO_BYTE_INDEX(Index4), |
| + TO_BYTE_INDEX(Index4) + 1, TO_BYTE_INDEX(Index5), |
| + TO_BYTE_INDEX(Index5) + 1, TO_BYTE_INDEX(Index6), |
| + TO_BYTE_INDEX(Index6) + 1, TO_BYTE_INDEX(Index7), |
| + TO_BYTE_INDEX(Index7) + 1); |
| +#undef TO_BYTE_INDEX |
| + return; |
| + } |
| case IceType_v4i1: |
| case IceType_v4i32: |
| case IceType_v4f32: { |