OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1128 Src1 /= 3; | 1128 Src1 /= 3; |
1129 } else if (Src1 % 2 == 0) { | 1129 } else if (Src1 % 2 == 0) { |
1130 if (Count2 == 0) | 1130 if (Count2 == 0) |
1131 ++CountOps; | 1131 ++CountOps; |
1132 ++Count2; | 1132 ++Count2; |
1133 Src1 /= 2; | 1133 Src1 /= 2; |
1134 } else { | 1134 } else { |
1135 return false; | 1135 return false; |
1136 } | 1136 } |
1137 } | 1137 } |
1138 // Lea optimization only works for i16 and i32 types, not i8. | 1138 // Lea optimization only works for i32 type, not i8 or i16. |
Jim Stichnoth
2016/01/12 14:54:02
What's the story with i64 under x86-64? Ask John,
sehr
2016/01/12 19:01:19
Not supported either. I changed the comment.
| |
1139 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1139 if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
1140 return false; | 1140 return false; |
1141 // Limit the number of lea/shl operations for a single multiply, to a | 1141 // Limit the number of lea/shl operations for a single multiply, to a |
1142 // somewhat arbitrary choice of 3. | 1142 // somewhat arbitrary choice of 3. |
1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
1144 if (CountOps > MaxOpsForOptimizedMul) | 1144 if (CountOps > MaxOpsForOptimizedMul) |
1145 return false; | 1145 return false; |
1146 _mov(T, Src0); | 1146 _mov(T, Src0); |
1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1148 for (uint32_t i = 0; i < Count9; ++i) { | 1148 for (uint32_t i = 0; i < Count9; ++i) { |
1149 constexpr uint16_t Shift = 3; // log2(9-1) | 1149 constexpr uint16_t Shift = 3; // log2(9-1) |
(...skipping 4433 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5583 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 5583 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
5584 Context.insert(Call); | 5584 Context.insert(Call); |
5585 Arith->setDeleted(); | 5585 Arith->setDeleted(); |
5586 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 5586 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
5587 InstCast::OpKind CastKind = Cast->getCastKind(); | 5587 InstCast::OpKind CastKind = Cast->getCastKind(); |
5588 Operand *Src0 = Cast->getSrc(0); | 5588 Operand *Src0 = Cast->getSrc(0); |
5589 const Type SrcType = Src0->getType(); | 5589 const Type SrcType = Src0->getType(); |
5590 Variable *Dest = Cast->getDest(); | 5590 Variable *Dest = Cast->getDest(); |
5591 const Type DestTy = Dest->getType(); | 5591 const Type DestTy = Dest->getType(); |
5592 const char *HelperName = nullptr; | 5592 const char *HelperName = nullptr; |
5593 Variable *CallDest = Dest; | |
5593 switch (CastKind) { | 5594 switch (CastKind) { |
5594 default: | 5595 default: |
5595 return; | 5596 return; |
5596 case InstCast::Fptosi: | 5597 case InstCast::Fptosi: |
5597 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 5598 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
5598 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 5599 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
5599 : H_fptosi_f64_i64; | 5600 : H_fptosi_f64_i64; |
5600 } else { | 5601 } else { |
5601 return; | 5602 return; |
5602 } | 5603 } |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5648 break; | 5649 break; |
5649 case InstCast::Bitcast: { | 5650 case InstCast::Bitcast: { |
5650 if (DestTy == Src0->getType()) | 5651 if (DestTy == Src0->getType()) |
5651 return; | 5652 return; |
5652 switch (DestTy) { | 5653 switch (DestTy) { |
5653 default: | 5654 default: |
5654 return; | 5655 return; |
5655 case IceType_i8: | 5656 case IceType_i8: |
5656 assert(Src0->getType() == IceType_v8i1); | 5657 assert(Src0->getType() == IceType_v8i1); |
5657 HelperName = H_bitcast_8xi1_i8; | 5658 HelperName = H_bitcast_8xi1_i8; |
5659 CallDest = Func->makeVariable(IceType_i32); | |
5658 break; | 5660 break; |
5659 case IceType_i16: | 5661 case IceType_i16: |
5660 assert(Src0->getType() == IceType_v16i1); | 5662 assert(Src0->getType() == IceType_v16i1); |
5661 HelperName = H_bitcast_16xi1_i16; | 5663 HelperName = H_bitcast_16xi1_i16; |
5664 CallDest = Func->makeVariable(IceType_i32); | |
5662 break; | 5665 break; |
5663 case IceType_v8i1: { | 5666 case IceType_v8i1: { |
5664 assert(Src0->getType() == IceType_i8); | 5667 assert(Src0->getType() == IceType_i8); |
5665 HelperName = H_bitcast_i8_8xi1; | 5668 HelperName = H_bitcast_i8_8xi1; |
5666 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5669 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
5667 // Arguments to functions are required to be at least 32 bits wide. | 5670 // Arguments to functions are required to be at least 32 bits wide. |
5668 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); | 5671 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
5669 Src0 = Src0AsI32; | 5672 Src0 = Src0AsI32; |
5670 } break; | 5673 } break; |
5671 case IceType_v16i1: { | 5674 case IceType_v16i1: { |
5672 assert(Src0->getType() == IceType_i16); | 5675 assert(Src0->getType() == IceType_i16); |
5673 HelperName = H_bitcast_i16_16xi1; | 5676 HelperName = H_bitcast_i16_16xi1; |
5674 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5677 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
5675 // Arguments to functions are required to be at least 32 bits wide. | 5678 // Arguments to functions are required to be at least 32 bits wide. |
5676 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); | 5679 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
5677 Src0 = Src0AsI32; | 5680 Src0 = Src0AsI32; |
5678 } break; | 5681 } break; |
5679 } | 5682 } |
5680 } break; | 5683 } break; |
5681 } | 5684 } |
5682 constexpr SizeT MaxSrcs = 1; | 5685 constexpr SizeT MaxSrcs = 1; |
5683 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | 5686 InstCall *Call = makeHelperCall(HelperName, CallDest, MaxSrcs); |
5684 Call->addArg(Src0); | 5687 Call->addArg(Src0); |
5685 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 5688 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
5686 Context.insert(Call); | 5689 Context.insert(Call); |
5690 // Sometimes we need to convert from i8 or i16 type to i32 to maintain ABI | |
Jim Stichnoth
2016/01/12 14:54:02
This is a little confusing because we're actually
sehr
2016/01/12 19:01:19
Done.
| |
5691 // compatibility. If so, insert a conversion operator after the call. | |
5692 if (CallDest != Dest) | |
Jim Stichnoth
2016/01/12 14:54:02
I think it would be marginally more future-proof i
sehr
2016/01/12 19:01:19
Done.
| |
5693 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest); | |
5687 Cast->setDeleted(); | 5694 Cast->setDeleted(); |
5688 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) { | 5695 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) { |
5689 std::vector<Type> ArgTypes; | 5696 std::vector<Type> ArgTypes; |
5690 Type ReturnType = IceType_void; | 5697 Type ReturnType = IceType_void; |
5691 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) { | 5698 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) { |
5692 default: | 5699 default: |
5693 return; | 5700 return; |
5694 case Intrinsics::Ctpop: { | 5701 case Intrinsics::Ctpop: { |
5695 Operand *Val = Intrinsic->getArg(0); | 5702 Operand *Val = Intrinsic->getArg(0); |
5696 Type ValTy = Val->getType(); | 5703 Type ValTy = Val->getType(); |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5834 int32_t RegNum) { | 5841 int32_t RegNum) { |
5835 return makeZeroedRegister(Ty, RegNum); | 5842 return makeZeroedRegister(Ty, RegNum); |
5836 } | 5843 } |
5837 | 5844 |
5838 template <typename TraitsType> | 5845 template <typename TraitsType> |
5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, | 5846 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
5840 int32_t RegNum) { | 5847 int32_t RegNum) { |
5841 Variable *MinusOnes = makeReg(Ty, RegNum); | 5848 Variable *MinusOnes = makeReg(Ty, RegNum); |
5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5849 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
5843 Context.insert<InstFakeDef>(MinusOnes); | 5850 Context.insert<InstFakeDef>(MinusOnes); |
5844 _pcmpeq(MinusOnes, MinusOnes); | 5851 if (Ty == IceType_f64) |
5852 // Making a vector of minus ones of type f64 is currently only used for the | |
5853 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq | |
5854 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the | |
5855 // same job and only requires SSE2. | |
5856 _pcmpeq(MinusOnes, MinusOnes, IceType_f32); | |
5857 else | |
5858 _pcmpeq(MinusOnes, MinusOnes); | |
5845 return MinusOnes; | 5859 return MinusOnes; |
5846 } | 5860 } |
5847 | 5861 |
5848 template <typename TraitsType> | 5862 template <typename TraitsType> |
5849 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5863 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
5850 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5864 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
5851 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5865 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
5852 _psub(Dest, MinusOne); | 5866 _psub(Dest, MinusOne); |
5853 return Dest; | 5867 return Dest; |
5854 } | 5868 } |
(...skipping 700 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6555 } | 6569 } |
6556 // the offset is not eligible for blinding or pooling, return the original | 6570 // the offset is not eligible for blinding or pooling, return the original |
6557 // mem operand | 6571 // mem operand |
6558 return MemOperand; | 6572 return MemOperand; |
6559 } | 6573 } |
6560 | 6574 |
6561 } // end of namespace X86NAMESPACE | 6575 } // end of namespace X86NAMESPACE |
6562 } // end of namespace Ice | 6576 } // end of namespace Ice |
6563 | 6577 |
6564 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6578 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |