Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1128 Src1 /= 3; | 1128 Src1 /= 3; |
| 1129 } else if (Src1 % 2 == 0) { | 1129 } else if (Src1 % 2 == 0) { |
| 1130 if (Count2 == 0) | 1130 if (Count2 == 0) |
| 1131 ++CountOps; | 1131 ++CountOps; |
| 1132 ++Count2; | 1132 ++Count2; |
| 1133 Src1 /= 2; | 1133 Src1 /= 2; |
| 1134 } else { | 1134 } else { |
| 1135 return false; | 1135 return false; |
| 1136 } | 1136 } |
| 1137 } | 1137 } |
| 1138 // Lea optimization only works for i16 and i32 types, not i8. | 1138 // Lea optimization only works for i32 types, not i8 or i16. |
|
Jim Stichnoth
2016/01/10 03:08:56
i32 type
sehr
2016/01/11 21:49:48
Done.
| |
| 1139 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1139 if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| 1140 return false; | 1140 return false; |
| 1141 // Limit the number of lea/shl operations for a single multiply, to a | 1141 // Limit the number of lea/shl operations for a single multiply, to a |
| 1142 // somewhat arbitrary choice of 3. | 1142 // somewhat arbitrary choice of 3. |
| 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; | 1143 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
| 1144 if (CountOps > MaxOpsForOptimizedMul) | 1144 if (CountOps > MaxOpsForOptimizedMul) |
| 1145 return false; | 1145 return false; |
| 1146 _mov(T, Src0); | 1146 _mov(T, Src0); |
| 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1147 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1148 for (uint32_t i = 0; i < Count9; ++i) { | 1148 for (uint32_t i = 0; i < Count9; ++i) { |
| 1149 constexpr uint16_t Shift = 3; // log2(9-1) | 1149 constexpr uint16_t Shift = 3; // log2(9-1) |
| (...skipping 4684 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5834 int32_t RegNum) { | 5834 int32_t RegNum) { |
| 5835 return makeZeroedRegister(Ty, RegNum); | 5835 return makeZeroedRegister(Ty, RegNum); |
| 5836 } | 5836 } |
| 5837 | 5837 |
| 5838 template <typename TraitsType> | 5838 template <typename TraitsType> |
| 5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, | 5839 Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
| 5840 int32_t RegNum) { | 5840 int32_t RegNum) { |
| 5841 Variable *MinusOnes = makeReg(Ty, RegNum); | 5841 Variable *MinusOnes = makeReg(Ty, RegNum); |
| 5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 5842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| 5843 Context.insert<InstFakeDef>(MinusOnes); | 5843 Context.insert<InstFakeDef>(MinusOnes); |
| 5844 _pcmpeq(MinusOnes, MinusOnes); | 5844 if (Ty == IceType_f64) |
| 5845 // Making a vector of minus ones of type f64 is currently only used for the | |
| 5846 // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq | |
| 5847 // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the | |
| 5848 // same job and only requires SSE2. | |
| 5849 _pcmpeq(MinusOnes, MinusOnes, IceType_f32); | |
| 5850 else | |
| 5851 _pcmpeq(MinusOnes, MinusOnes); | |
| 5845 return MinusOnes; | 5852 return MinusOnes; |
| 5846 } | 5853 } |
| 5847 | 5854 |
| 5848 template <typename TraitsType> | 5855 template <typename TraitsType> |
| 5849 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { | 5856 Variable *TargetX86Base<TraitsType>::makeVectorOfOnes(Type Ty, int32_t RegNum) { |
| 5850 Variable *Dest = makeVectorOfZeros(Ty, RegNum); | 5857 Variable *Dest = makeVectorOfZeros(Ty, RegNum); |
| 5851 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 5858 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 5852 _psub(Dest, MinusOne); | 5859 _psub(Dest, MinusOne); |
| 5853 return Dest; | 5860 return Dest; |
| 5854 } | 5861 } |
| (...skipping 700 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6555 } | 6562 } |
| 6556 // the offset is not eligible for blinding or pooling, return the original | 6563 // the offset is not eligible for blinding or pooling, return the original |
| 6557 // mem operand | 6564 // mem operand |
| 6558 return MemOperand; | 6565 return MemOperand; |
| 6559 } | 6566 } |
| 6560 | 6567 |
| 6561 } // end of namespace X86NAMESPACE | 6568 } // end of namespace X86NAMESPACE |
| 6562 } // end of namespace Ice | 6569 } // end of namespace Ice |
| 6563 | 6570 |
| 6564 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6571 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |