 Chromium Code Reviews
 Chromium Code Reviews Issue 1909013002:
  Subzero. X86. Lowers shufflevector using xmm instructions.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
    
  
    Issue 1909013002:
  Subzero. X86. Lowers shufflevector using xmm instructions.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master| Index: src/IceTargetLoweringX86BaseImpl.h | 
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h | 
| index ebc5a2e2fe683dc2262f44433d7bbafbe7075b0a..55021d8551d3cb00f42529bb2bb9b5c929c3e079 100644 | 
| --- a/src/IceTargetLoweringX86BaseImpl.h | 
| +++ b/src/IceTargetLoweringX86BaseImpl.h | 
| @@ -5570,25 +5570,274 @@ void TargetX86Base<TraitsType>::lowerRet(const InstRet *Instr) { | 
| keepEspLiveAtExit(); | 
| } | 
| +inline int32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, | 
| 
Jim Stichnoth
2016/04/25 21:23:45
I'm wondering whether these SizeT should be explic
 
John
2016/04/25 22:38:39
These stem from SizeT values -- i.e., operand inde
 
Jim Stichnoth
2016/04/25 23:09:03
I should have added more to my description above o
 
John
2016/04/26 11:14:13
Acknowledged.
 | 
| + SizeT Index3) { | 
| + const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | | 
| + ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); | 
| + assert(Mask < 256); | 
| + return Mask; | 
| 
Jim Stichnoth
2016/04/25 21:23:45
Mask is unsigned, but the function returns a signe
 
John
2016/04/25 22:38:39
Mixing signed and unsigned integer types... what c
 | 
| +} | 
| + | 
| +template <typename TraitsType> | 
| +Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( | 
| + Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { | 
| + constexpr SizeT SrcBit = 1 << 2; | 
| + assert((Index0 & SrcBit) == (Index1 & SrcBit)); | 
| + assert((Index0 & SrcBit) == (Index2 & SrcBit)); | 
| + assert((Index0 & SrcBit) == (Index3 & SrcBit)); | 
| + (void)SrcBit; | 
| + | 
| + const Type SrcTy = Src->getType(); | 
| + auto *T = makeReg(SrcTy); | 
| + auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); | 
| + auto *Mask = | 
| + Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | 
| + _pshufd(T, SrcRM, Mask); | 
| + return T; | 
| +} | 
| + | 
| +template <typename TraitsType> | 
| +Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( | 
| + Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, | 
| + SizeT Index3) { | 
| + constexpr SizeT SrcBit = 1 << 2; | 
| + assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); | 
| + assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); | 
| + (void)SrcBit; | 
| + | 
| + const Type SrcTy = Src0->getType(); | 
| + assert(Src1->getType() == SrcTy); | 
| + auto *T = makeReg(SrcTy); | 
| + auto *Src0R = legalizeToReg(Src0); | 
| + auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 
| + auto *Mask = | 
| + Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); | 
| + _movp(T, Src0R); | 
| + _shufps(T, Src1RM, Mask); | 
| + return T; | 
| +} | 
| + | 
| +template <typename TraitsType> | 
| +Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { | 
| + return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, | 
| + Index1, IGNORE_INDEX); | 
| +} | 
| + | 
| +inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, | 
| + SizeT Index3) { | 
| + constexpr SizeT SrcBit = 1 << 2; | 
| + const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); | 
| + const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); | 
| + const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); | 
| + const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); | 
| + return Index0Bits | Index1Bits | Index2Bits | Index3Bits; | 
| +} | 
| + | 
| template <typename TraitsType> | 
| void TargetX86Base<TraitsType>::lowerShuffleVector( | 
| const InstShuffleVector *Instr) { | 
| auto *Dest = Instr->getDest(); | 
| const Type DestTy = Dest->getType(); | 
| + auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | 
| + auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | 
| + const SizeT NumElements = typeNumElements(DestTy); | 
| auto *T = makeReg(DestTy); | 
| switch (DestTy) { | 
| default: | 
| break; | 
| - // TODO(jpp): figure out how to properly lower this without scalarization. | 
| + // TODO(jpp): figure out how to properly lower the remaining cases without | 
| + // scalarization. | 
| + case IceType_v4i1: | 
| + case IceType_v4i32: | 
| + case IceType_v4f32: { | 
| + static constexpr SizeT ExpectedNumElements = 4; | 
| + assert(ExpectedNumElements == Instr->getNumIndexes()); | 
| + const SizeT Index0 = Instr->getIndex(0)->getValue(); | 
| + const SizeT Index1 = Instr->getIndex(1)->getValue(); | 
| + const SizeT Index2 = Instr->getIndex(2)->getValue(); | 
| + const SizeT Index3 = Instr->getIndex(3)->getValue(); | 
| + Variable *T = nullptr; | 
| + switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { | 
| +#define CASE_SRCS_IN(S0, S1, S2, S3) \ | 
| + case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) | 
| + CASE_SRCS_IN(0, 0, 0, 0) : { | 
| + T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, | 
| + Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 0, 0, 1) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | 
| + Src1, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 0, 1, 0) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | 
| + Src0, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 0, 1, 1) : { | 
| + T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, | 
| + Index2, Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 1, 0, 0) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | 
| + Src1, Index1); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 1, 0, 1) : { | 
| + if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && | 
| + (Index3 - ExpectedNumElements) == 1) { | 
| + auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 
| + auto *Src0R = legalizeToReg(Src0); | 
| + T = makeReg(DestTy); | 
| + _movp(T, Src0R); | 
| + _punpckl(T, Src1RM); | 
| + assert(false); | 
| 
Jim Stichnoth
2016/04/25 21:23:45
???  Is something got getting tested that ought to
 
John
2016/04/25 22:38:39
Discussed offline.
For posterity: none of the CAS
 | 
| + } else if (Index0 == Index2 && Index1 == Index3) { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src0, Index0, Src1, Index1); | 
| + T = lowerShuffleVector_AllFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | 
| + UNIFIED_INDEX_1); | 
| + } else { | 
| + auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src0, Index0, Src1, Index1); | 
| + auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src0, Index2, Src1, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 1, 1, 0) : { | 
| + if (Index0 == Index3 && Index1 == Index2) { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src0, Index0, Src1, Index1); | 
| + T = lowerShuffleVector_AllFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | 
| + UNIFIED_INDEX_0); | 
| + } else { | 
| + auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src0, Index0, Src1, Index1); | 
| + auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src1, Index2, Src0, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(0, 1, 1, 1) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, | 
| + Src1, Index1); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 0, 0, 0) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | 
| + Src0, Index1); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 0, 0, 1) : { | 
| + if (Index0 == Index3 && Index1 == Index2) { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src1, Index0, Src0, Index1); | 
| + T = lowerShuffleVector_AllFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, | 
| + UNIFIED_INDEX_0); | 
| + } else { | 
| + auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src1, Index0, Src0, Index1); | 
| + auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src0, Index2, Src1, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 0, 1, 0) : { | 
| + if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && | 
| + (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { | 
| + auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); | 
| + auto *Src0R = legalizeToReg(Src1); | 
| + T = makeReg(DestTy); | 
| + _movp(T, Src0R); | 
| + _punpckl(T, Src1RM); | 
| + } else if (Index0 == Index2 && Index1 == Index3) { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src1, Index0, Src0, Index1); | 
| + T = lowerShuffleVector_AllFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, | 
| + UNIFIED_INDEX_1); | 
| + } else { | 
| + auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src1, Index0, Src0, Index1); | 
| + auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( | 
| + Src1, Index2, Src0, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 0, 1, 1) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, | 
| + Src0, Index1); | 
| + T = lowerShuffleVector_TwoFromSameSrc( | 
| + Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 1, 0, 0) : { | 
| + T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, | 
| + Index2, Index3); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 1, 0, 1) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, | 
| + Src1, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 1, 1, 0) : { | 
| + auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, | 
| + Src0, Index3); | 
| + T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, | 
| + UNIFIED_INDEX_0, UNIFIED_INDEX_1); | 
| + } | 
| + break; | 
| + CASE_SRCS_IN(1, 1, 1, 1) : { | 
| + T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, | 
| + Index3); | 
| + } | 
| + break; | 
| +#undef CASE_SRCS_IN | 
| + } | 
| + | 
| + assert(T != nullptr); | 
| + assert(T->getType() == DestTy); | 
| + _movp(Dest, T); | 
| + return; | 
| + } break; | 
| } | 
| // Unoptimized shuffle. Perform a series of inserts and extracts. | 
| Context.insert<InstFakeDef>(T); | 
| - auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); | 
| - auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); | 
| - const SizeT NumElements = typeNumElements(DestTy); | 
| const Type ElementType = typeElementType(DestTy); | 
| for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { | 
| auto *Index = Instr->getIndex(I); |