Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1128 Src1 /= 3; | 1128 Src1 /= 3; |
| 1129 } else if (Src1 % 2 == 0) { | 1129 } else if (Src1 % 2 == 0) { |
| 1130 if (Count2 == 0) | 1130 if (Count2 == 0) |
| 1131 ++CountOps; | 1131 ++CountOps; |
| 1132 ++Count2; | 1132 ++Count2; |
| 1133 Src1 /= 2; | 1133 Src1 /= 2; |
| 1134 } else { | 1134 } else { |
| 1135 return false; | 1135 return false; |
| 1136 } | 1136 } |
| 1137 } | 1137 } |
| 1138 // Lea optimization only works for i16 and i32 types, not i8. | 1138 // Lea optimization only works for i32 type, not i8 or i16. |
|
Jim Stichnoth
2016/01/12 14:54:02
What's the story with i64 under x86-64? Ask John,
sehr
2016/01/12 19:01:19
Not supported either. I changed the comment.
| |
| 1139 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1139 if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| 1140 return false; | 1140 return false; |
| 1141 // Limit the number of lea/shl operations for a single multiply, to a | 1141 // Limit the number of lea/shl operations for a single multiply, to a |
| 1142 // somewhat arbitrary choice of 3. | 1142 // somewhat arbitrary choice of 3. |
| 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
| 1144 if (CountOps > MaxOpsForOptimizedMul) | 1144 if (CountOps > MaxOpsForOptimizedMul) |
| 1145 return false; | 1145 return false; |
| 1146 _mov(T, Src0); | 1146 _mov(T, Src0); |
| 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1148 for (uint32_t i = 0; i < Count9; ++i) { | 1148 for (uint32_t i = 0; i < Count9; ++i) { |
| 1149 constexpr uint16_t Shift = 3; // log2(9-1) | 1149 constexpr uint16_t Shift = 3; // log2(9-1) |
| (...skipping 4433 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5583 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 5583 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
| 5584 Context.insert(Call); | 5584 Context.insert(Call); |
| 5585 Arith->setDeleted(); | 5585 Arith->setDeleted(); |
| 5586 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 5586 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 5587 InstCast::OpKind CastKind = Cast->getCastKind(); | 5587 InstCast::OpKind CastKind = Cast->getCastKind(); |
| 5588 Operand *Src0 = Cast->getSrc(0); | 5588 Operand *Src0 = Cast->getSrc(0); |
| 5589 const Type SrcType = Src0->getType(); | 5589 const Type SrcType = Src0->getType(); |
| 5590 Variable *Dest = Cast->getDest(); | 5590 Variable *Dest = Cast->getDest(); |
| 5591 const Type DestTy = Dest->getType(); | 5591 const Type DestTy = Dest->getType(); |
| 5592 const char *HelperName = nullptr; | 5592 const char *HelperName = nullptr; |
| 5593 Variable *CallDest = Dest; | |
| 5593 switch (CastKind) { | 5594 switch (CastKind) { |
| 5594 default: | 5595 default: |
| 5595 return; | 5596 return; |
| 5596 case InstCast::Fptosi: | 5597 case InstCast::Fptosi: |
| 5597 if (!Traits::Is64Bit && DestTy == IceType_i64) { | 5598 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 5598 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 5599 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 5599 : H_fptosi_f64_i64; | 5600 : H_fptosi_f64_i64; |
| 5600 } else { | 5601 } else { |
| 5601 return; | 5602 return; |
| 5602 } | 5603 } |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5648 break; | 5649 break; |
| 5649 case InstCast::Bitcast: { | 5650 case InstCast::Bitcast: { |
| 5650 if (DestTy == Src0->getType()) | 5651 if (DestTy == Src0->getType()) |
| 5651 return; | 5652 return; |
| 5652 switch (DestTy) { | 5653 switch (DestTy) { |
| 5653 default: | 5654 default: |
| 5654 return; | 5655 return; |
| 5655 case IceType_i8: | 5656 case IceType_i8: |
| 5656 assert(Src0->getType() == IceType_v8i1); | 5657 assert(Src0->getType() == IceType_v8i1); |
| 5657 HelperName = H_bitcast_8xi1_i8; | 5658 HelperName = H_bitcast_8xi1_i8; |
| 5659 CallDest = Func->makeVariable(IceType_i32); | |
| 5658 break; | 5660 break; |
| 5659 case IceType_i16: | 5661 case IceType_i16: |
| 5660 assert(Src0->getType() == IceType_v16i1); | 5662 assert(Src0->getType() == IceType_v16i1); |
| 5661 HelperName = H_bitcast_16xi1_i16; | 5663 HelperName = H_bitcast_16xi1_i16; |
| 5664 CallDest = Func->makeVariable(IceType_i32); | |
| 5662 break; | 5665 break; |
| 5663 case IceType_v8i1: { | 5666 case IceType_v8i1: { |
| 5664 assert(Src0->getType() == IceType_i8); | 5667 assert(Src0->getType() == IceType_i8); |
| 5665 HelperName = H_bitcast_i8_8xi1; | 5668 HelperName = H_bitcast_i8_8xi1; |
| 5666 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5669 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
| 5667 // Arguments to functions are required to be at least 32 bits wide. | 5670 // Arguments to functions are required to be at least 32 bits wide. |
| 5668 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); | 5671 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
| 5669 Src0 = Src0AsI32; | 5672 Src0 = Src0AsI32; |
| 5670 } break; | 5673 } break; |
| 5671 case IceType_v16i1: { | 5674 case IceType_v16i1: { |
| 5672 assert(Src0->getType() == IceType_i16); | 5675 assert(Src0->getType() == IceType_i16); |
| 5673 HelperName = H_bitcast_i16_16xi1; | 5676 HelperName = H_bitcast_i16_16xi1; |
| 5674 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 5677 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
| 5675 // Arguments to functions are required to be at least 32 bits wide. | 5678 // Arguments to functions are required to be at least 32 bits wide. |
| 5676 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); | 5679 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0); |
| 5677 Src0 = Src0AsI32; | 5680 Src0 = Src0AsI32; |
| 5678 } break; | 5681 } break; |
| 5679 } | 5682 } |
| 5680 } break; | 5683 } break; |
| 5681 } | 5684 } |
| 5682 constexpr SizeT MaxSrcs = 1; | 5685 constexpr SizeT MaxSrcs = 1; |
| 5683 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | 5686 InstCall *Call = makeHelperCall(HelperName, CallDest, MaxSrcs); |
| 5684 Call->addArg(Src0); | 5687 Call->addArg(Src0); |
| 5685 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 5688 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); |
| 5686 Context.insert(Call); | 5689 Context.insert(Call); |
| 5690 // Sometimes we need to convert from i8 or i16 type to i32 to maintain ABI | |
|
Jim Stichnoth
2016/01/12 14:54:02
This is a little confusing because we're actually
sehr
2016/01/12 19:01:19
Done.
| |
| 5691 // compatibility. If so, insert a conversion operator after the call. | |
| 5692 if (CallDest != Dest) | |
|
Jim Stichnoth
2016/01/12 14:54:02
I think it would be marginally more future-proof i
sehr
2016/01/12 19:01:19
Done.
| |
| 5693 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest); | |
| 5687 Cast->setDeleted(); | 5694 Cast->setDeleted(); |
| 5688 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) { | 5695 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) { |
| 5689 std::vector<Type> ArgTypes; | 5696 std::vector<Type> ArgTypes; |
| 5690 Type ReturnType = IceType_void; | 5697 Type ReturnType = IceType_void; |
| 5691 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) { | 5698 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) { |
| 5692 default: | 5699 default: |
| 5693 return; | 5700 return; |
| 5694 case Intrinsics::Ctpop: { | 5701 case Intrinsics::Ctpop: { |
| 5695 Operand *Val = Intrinsic->getArg(0); | 5702 Operand *Val = Intrinsic->getArg(0); |
| 5696 Type ValTy = Val->getType(); | 5703 Type ValTy = Val->getType(); |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5834 int32_t RegNum) { | 5841 int32_t RegNum) { |
| 5835 return makeZeroedRegister(Ty, RegNum); | 5842 return makeZeroedRegister(Ty, RegNum); |
| 5836 } | 5843 } |
| 5837 | 5844 |
| 5838 template <typename TraitsType> | 5845 template <typename TraitsType> |
| 5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, | 5846 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
| 5840 int32_t RegNum) { | 5847 int32_t RegNum) { |
| 5841 Variable *MinusOnes = makeReg(Ty, RegNum); | 5848 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5849 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 5843 Context.insert<InstFakeDef>(MinusOnes); | 5850 Context.insert<InstFakeDef>(MinusOnes); |
| 5844 _pcmpeq(MinusOnes, MinusOnes); | 5851 if (Ty == IceType_f64) |
| 5852 // Making a vector of minus ones of type f64 is currently only used for the | |
| 5853 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq | |
| 5854 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the | |
| 5855 // same job and only requires SSE2. | |
| 5856 _pcmpeq(MinusOnes, MinusOnes, IceType_f32); | |
| 5857 else | |
| 5858 _pcmpeq(MinusOnes, MinusOnes); | |
| 5845 return MinusOnes; | 5859 return MinusOnes; |
| 5846 } | 5860 } |
| 5847 | 5861 |
| 5848 template <typename TraitsType> | 5862 template <typename TraitsType> |
| 5849 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5863 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
| 5850 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5864 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 5851 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5865 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 5852 _psub(Dest, MinusOne); | 5866 _psub(Dest, MinusOne); |
| 5853 return Dest; | 5867 return Dest; |
| 5854 } | 5868 } |
| (...skipping 700 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6555 } | 6569 } |
| 6556 // the offset is not eligible for blinding or pooling, return the original | 6570 // the offset is not eligible for blinding or pooling, return the original |
| 6557 // mem operand | 6571 // mem operand |
| 6558 return MemOperand; | 6572 return MemOperand; |
| 6559 } | 6573 } |
| 6560 | 6574 |
| 6561 } // end of namespace X86NAMESPACE | 6575 } // end of namespace X86NAMESPACE |
| 6562 } // end of namespace Ice | 6576 } // end of namespace Ice |
| 6563 | 6577 |
| 6564 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6578 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |