Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 5127768437978d0623cfa2e52d4ea0e0767bfee5..08dc25c446463dfaa11c08d5e9d5709f0dd3907a 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -1135,8 +1135,8 @@ bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| return false; |
| } |
| } |
| - // Lea optimization only works for i16 and i32 types, not i8. |
| - if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| + // Lea optimization only works for i32 types, not i8 or i16. |
|
Jim Stichnoth
2016/01/10 03:08:56
i32 type
sehr
2016/01/11 21:49:48
Done.
|
| + if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| return false; |
| // Limit the number of lea/shl operations for a single multiply, to a |
| // somewhat arbitrary choice of 3. |
| @@ -5841,7 +5841,14 @@ Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
| Variable *MinusOnes = makeReg(Ty, RegNum); |
| // Insert a FakeDef so the live range of MinusOnes is not overestimated. |
| Context.insert<InstFakeDef>(MinusOnes); |
| - _pcmpeq(MinusOnes, MinusOnes); |
| + if (Ty == IceType_f64) |
| + // Making a vector of minus ones of type f64 is currently only used for the |
| + // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq |
| + // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the |
| + // same job and only requires SSE2. |
| + _pcmpeq(MinusOnes, MinusOnes, IceType_f32); |
| + else |
| + _pcmpeq(MinusOnes, MinusOnes); |
| return MinusOnes; |
| } |