OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1128 Src1 /= 3; | 1128 Src1 /= 3; |
1129 } else if (Src1 % 2 == 0) { | 1129 } else if (Src1 % 2 == 0) { |
1130 if (Count2 == 0) | 1130 if (Count2 == 0) |
1131 ++CountOps; | 1131 ++CountOps; |
1132 ++Count2; | 1132 ++Count2; |
1133 Src1 /= 2; | 1133 Src1 /= 2; |
1134 } else { | 1134 } else { |
1135 return false; | 1135 return false; |
1136 } | 1136 } |
1137 } | 1137 } |
1138 // Lea optimization only works for i16 and i32 types, not i8. | 1138 // Lea optimization only works for i32 types, not i8 or i16. |
Jim Stichnoth
2016/01/10 03:08:56
i32 type
sehr
2016/01/11 21:49:48
Done.
| |
1139 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1139 if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
1140 return false; | 1140 return false; |
1141 // Limit the number of lea/shl operations for a single multiply, to a | 1141 // Limit the number of lea/shl operations for a single multiply, to a |
1142 // somewhat arbitrary choice of 3. | 1142 // somewhat arbitrary choice of 3. |
1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
1144 if (CountOps > MaxOpsForOptimizedMul) | 1144 if (CountOps > MaxOpsForOptimizedMul) |
1145 return false; | 1145 return false; |
1146 _mov(T, Src0); | 1146 _mov(T, Src0); |
1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1148 for (uint32_t i = 0; i < Count9; ++i) { | 1148 for (uint32_t i = 0; i < Count9; ++i) { |
1149 constexpr uint16_t Shift = 3; // log2(9-1) | 1149 constexpr uint16_t Shift = 3; // log2(9-1) |
(...skipping 4684 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5834 int32_t RegNum) { | 5834 int32_t RegNum) { |
5835 return makeZeroedRegister(Ty, RegNum); | 5835 return makeZeroedRegister(Ty, RegNum); |
5836 } | 5836 } |
5837 | 5837 |
5838 template <typename TraitsType> | 5838 template <typename TraitsType> |
5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, | 5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
5840 int32_t RegNum) { | 5840 int32_t RegNum) { |
5841 Variable *MinusOnes = makeReg(Ty, RegNum); | 5841 Variable *MinusOnes = makeReg(Ty, RegNum); |
5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
5843 Context.insert<InstFakeDef>(MinusOnes); | 5843 Context.insert<InstFakeDef>(MinusOnes); |
5844 _pcmpeq(MinusOnes, MinusOnes); | 5844 if (Ty == IceType_f64) |
5845 // Making a vector of minus ones of type f64 is currently only used for the | |
5846 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq | |
5847 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the | |
5848 // same job and only requires SSE2. | |
5849 _pcmpeq(MinusOnes, MinusOnes, IceType_f32); | |
5850 else | |
5851 _pcmpeq(MinusOnes, MinusOnes); | |
5845 return MinusOnes; | 5852 return MinusOnes; |
5846 } | 5853 } |
5847 | 5854 |
5848 template <typename TraitsType> | 5855 template <typename TraitsType> |
5849 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5856 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
5850 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5857 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
5851 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5858 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
5852 _psub(Dest, MinusOne); | 5859 _psub(Dest, MinusOne); |
5853 return Dest; | 5860 return Dest; |
5854 } | 5861 } |
(...skipping 700 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6555 } | 6562 } |
6556 // the offset is not eligible for blinding or pooling, return the original | 6563 // the offset is not eligible for blinding or pooling, return the original |
6557 // mem operand | 6564 // mem operand |
6558 return MemOperand; | 6565 return MemOperand; |
6559 } | 6566 } |
6560 | 6567 |
6561 } // end of namespace X86NAMESPACE | 6568 } // end of namespace X86NAMESPACE |
6562 } // end of namespace Ice | 6569 } // end of namespace Ice |
6563 | 6570 |
6564 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6571 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |