Index: src/IceTargetLoweringX86BaseImpl.h |
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
index d96c0caaf8909d1b25e9e2100d94de1d2bf2c633..35d7ea047e9c70eaa2e069af2f51217b388ff6cc 100644 |
--- a/src/IceTargetLoweringX86BaseImpl.h |
+++ b/src/IceTargetLoweringX86BaseImpl.h |
@@ -5610,25 +5610,295 @@ void TargetX86Base<TraitsType>::lowerRet(const InstRet *Instr) { |
keepEspLiveAtExit(); |
} |
+inline uint32_t makePshufdMask(SizeT Index0, SizeT Index1, SizeT Index2, |
+ SizeT Index3) { |
+ const SizeT Mask = (Index0 & 0x3) | ((Index1 & 0x3) << 2) | |
+ ((Index2 & 0x3) << 4) | ((Index3 & 0x3) << 6); |
+ assert(Mask < 256); |
+ return Mask; |
+} |
+ |
+template <typename TraitsType> |
+Variable *TargetX86Base<TraitsType>::lowerShuffleVector_AllFromSameSrc( |
+ Variable *Src, SizeT Index0, SizeT Index1, SizeT Index2, SizeT Index3) { |
+ constexpr SizeT SrcBit = 1 << 2; |
+ assert((Index0 & SrcBit) == (Index1 & SrcBit)); |
+ assert((Index0 & SrcBit) == (Index2 & SrcBit)); |
+ assert((Index0 & SrcBit) == (Index3 & SrcBit)); |
+ (void)SrcBit; |
+ |
+ const Type SrcTy = Src->getType(); |
+ auto *T = makeReg(SrcTy); |
+ auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem); |
+ auto *Mask = |
+ Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); |
+ _pshufd(T, SrcRM, Mask); |
+ return T; |
+} |
+ |
+template <typename TraitsType> |
+Variable *TargetX86Base<TraitsType>::lowerShuffleVector_TwoFromSameSrc( |
+ Variable *Src0, SizeT Index0, SizeT Index1, Variable *Src1, SizeT Index2, |
+ SizeT Index3) { |
+ constexpr SizeT SrcBit = 1 << 2; |
+ assert((Index0 & SrcBit) == (Index1 & SrcBit) || (Index1 == IGNORE_INDEX)); |
+ assert((Index2 & SrcBit) == (Index3 & SrcBit) || (Index3 == IGNORE_INDEX)); |
+ (void)SrcBit; |
+ |
+ const Type SrcTy = Src0->getType(); |
+ assert(Src1->getType() == SrcTy); |
+ auto *T = makeReg(SrcTy); |
+ auto *Src0R = legalizeToReg(Src0); |
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
+ auto *Mask = |
+ Ctx->getConstantInt32(makePshufdMask(Index0, Index1, Index2, Index3)); |
+ _movp(T, Src0R); |
+ _shufps(T, Src1RM, Mask); |
+ return T; |
+} |
+ |
+template <typename TraitsType> |
+Variable *TargetX86Base<TraitsType>::lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Variable *Src0, SizeT Index0, Variable *Src1, SizeT Index1) { |
+ return lowerShuffleVector_TwoFromSameSrc(Src0, Index0, IGNORE_INDEX, Src1, |
+ Index1, IGNORE_INDEX); |
+} |
+ |
+inline SizeT makeSrcSwitchMask(SizeT Index0, SizeT Index1, SizeT Index2, |
+ SizeT Index3) { |
+ constexpr SizeT SrcBit = 1 << 2; |
+ const SizeT Index0Bits = ((Index0 & SrcBit) == 0) ? 0 : (1 << 0); |
+ const SizeT Index1Bits = ((Index1 & SrcBit) == 0) ? 0 : (1 << 1); |
+ const SizeT Index2Bits = ((Index2 & SrcBit) == 0) ? 0 : (1 << 2); |
+ const SizeT Index3Bits = ((Index3 & SrcBit) == 0) ? 0 : (1 << 3); |
+ return Index0Bits | Index1Bits | Index2Bits | Index3Bits; |
+} |
+ |
template <typename TraitsType> |
void TargetX86Base<TraitsType>::lowerShuffleVector( |
const InstShuffleVector *Instr) { |
auto *Dest = Instr->getDest(); |
const Type DestTy = Dest->getType(); |
+ auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); |
+ auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); |
+ const SizeT NumElements = typeNumElements(DestTy); |
auto *T = makeReg(DestTy); |
switch (DestTy) { |
default: |
break; |
- // TODO(jpp): figure out how to properly lower this without scalarization. |
+ // TODO(jpp): figure out how to properly lower the remaining cases without |
+ // scalarization. |
+ case IceType_v4i1: |
+ case IceType_v4i32: |
+ case IceType_v4f32: { |
+ static constexpr SizeT ExpectedNumElements = 4; |
+ assert(ExpectedNumElements == Instr->getNumIndexes()); |
+ const SizeT Index0 = Instr->getIndex(0)->getValue(); |
+ const SizeT Index1 = Instr->getIndex(1)->getValue(); |
+ const SizeT Index2 = Instr->getIndex(2)->getValue(); |
+ const SizeT Index3 = Instr->getIndex(3)->getValue(); |
+ Variable *T = nullptr; |
+ switch (makeSrcSwitchMask(Index0, Index1, Index2, Index3)) { |
+#define CASE_SRCS_IN(S0, S1, S2, S3) \ |
+ case (((S0) << 0) | ((S1) << 1) | ((S2) << 2) | ((S3) << 3)) |
+ CASE_SRCS_IN(0, 0, 0, 0) : { |
+ T = lowerShuffleVector_AllFromSameSrc(Src0, Index0, Index1, Index2, |
+ Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 0, 0, 1) : { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, |
+ Src1, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 0, 1, 0) : { |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, |
+ Src0, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Unified, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 0, 1, 1) : { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ T = lowerShuffleVector_TwoFromSameSrc(Src0, Index0, Index1, Src1, |
+ Index2, Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 1, 0, 0) : { |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, |
+ Src1, Index1); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 1, 0, 1) : { |
+ if (Index0 == 0 && (Index1 - ExpectedNumElements) == 0 && Index2 == 1 && |
+ (Index3 - ExpectedNumElements) == 1) { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
+ auto *Src0R = legalizeToReg(Src0); |
+ T = makeReg(DestTy); |
+ _movp(T, Src0R); |
+ _punpckl(T, Src1RM); |
+ } else if (Index0 == Index2 && Index1 == Index3) { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src0, Index0, Src1, Index1); |
+ T = lowerShuffleVector_AllFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, |
+ UNIFIED_INDEX_1); |
+ } else { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src0, Index0, Src1, Index1); |
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src0, Index2, Src1, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 1, 1, 0) : { |
+ if (Index0 == Index3 && Index1 == Index2) { |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src0, Index0, Src1, Index1); |
+ T = lowerShuffleVector_AllFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, |
+ UNIFIED_INDEX_0); |
+ } else { |
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src0, Index0, Src1, Index1); |
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src1, Index2, Src0, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ } |
+ break; |
+ CASE_SRCS_IN(0, 1, 1, 1) : { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index0, |
+ Src1, Index1); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 0, 0, 0) : { |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, |
+ Src0, Index1); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src0, Index2, Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 0, 0, 1) : { |
+ if (Index0 == Index3 && Index1 == Index2) { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src1, Index0, Src0, Index1); |
+ T = lowerShuffleVector_AllFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_1, |
+ UNIFIED_INDEX_0); |
+ } else { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src1, Index0, Src0, Index1); |
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src0, Index2, Src1, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 0, 1, 0) : { |
+ if ((Index0 - ExpectedNumElements) == 0 && Index1 == 0 && |
+ (Index2 - ExpectedNumElements) == 1 && Index3 == 1) { |
+ auto *Src1RM = legalize(Src0, Legal_Reg | Legal_Mem); |
+ auto *Src0R = legalizeToReg(Src1); |
+ T = makeReg(DestTy); |
+ _movp(T, Src0R); |
+ _punpckl(T, Src1RM); |
+ } else if (Index0 == Index2 && Index1 == Index3) { |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src1, Index0, Src0, Index1); |
+ T = lowerShuffleVector_AllFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, UNIFIED_INDEX_0, |
+ UNIFIED_INDEX_1); |
+ } else { |
+ auto *Unified0 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src1, Index0, Src0, Index1); |
+ auto *Unified1 = lowerShuffleVector_UnifyFromDifferentSrcs( |
+ Src1, Index2, Src0, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified0, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Unified1, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 0, 1, 1) : { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index0, |
+ Src0, Index1); |
+ T = lowerShuffleVector_TwoFromSameSrc( |
+ Unified, UNIFIED_INDEX_0, UNIFIED_INDEX_1, Src1, Index2, Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 1, 0, 0) : { |
+ T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Src0, |
+ Index2, Index3); |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 1, 0, 1) : { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src0, Index2, |
+ Src1, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 1, 1, 0) : { |
+ auto *Unified = lowerShuffleVector_UnifyFromDifferentSrcs(Src1, Index2, |
+ Src0, Index3); |
+ T = lowerShuffleVector_TwoFromSameSrc(Src1, Index0, Index1, Unified, |
+ UNIFIED_INDEX_0, UNIFIED_INDEX_1); |
+ } |
+ break; |
+ CASE_SRCS_IN(1, 1, 1, 1) : { |
+ assert(false && "Following code is untested but likely correct; test " |
+ "and remove assert."); |
+ T = lowerShuffleVector_AllFromSameSrc(Src1, Index0, Index1, Index2, |
+ Index3); |
+ } |
+ break; |
+#undef CASE_SRCS_IN |
+ } |
+ |
+ assert(T != nullptr); |
+ assert(T->getType() == DestTy); |
+ _movp(Dest, T); |
+ return; |
+ } break; |
} |
// Unoptimized shuffle. Perform a series of inserts and extracts. |
Context.insert<InstFakeDef>(T); |
- auto *Src0 = llvm::cast<Variable>(Instr->getSrc(0)); |
- auto *Src1 = llvm::cast<Variable>(Instr->getSrc(1)); |
- const SizeT NumElements = typeNumElements(DestTy); |
const Type ElementType = typeElementType(DestTy); |
for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) { |
auto *Index = Instr->getIndex(I); |