Index: src/IceTargetLoweringX86BaseImpl.h |
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
index 5127768437978d0623cfa2e52d4ea0e0767bfee5..08dc25c446463dfaa11c08d5e9d5709f0dd3907a 100644 |
--- a/src/IceTargetLoweringX86BaseImpl.h |
+++ b/src/IceTargetLoweringX86BaseImpl.h |
@@ -1135,8 +1135,8 @@ bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
return false; |
} |
} |
- // Lea optimization only works for i16 and i32 types, not i8. |
- if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
+ // Lea optimization only works for i32 types, not i8 or i16. |
Jim Stichnoth
2016/01/10 03:08:56
i32 type
sehr
2016/01/11 21:49:48
Done.
|
+ if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
return false; |
// Limit the number of lea/shl operations for a single multiply, to a |
// somewhat arbitrary choice of 3. |
@@ -5841,7 +5841,14 @@ Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, |
Variable *MinusOnes = makeReg(Ty, RegNum); |
// Insert a FakeDef so the live range of MinusOnes is not overestimated. |
Context.insert<InstFakeDef>(MinusOnes); |
- _pcmpeq(MinusOnes, MinusOnes); |
+ if (Ty == IceType_f64) |
+ // Making a vector of minus ones of type f64 is currently only used for the |
+ // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq |
+ // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the |
+ // same job and only requires SSE2. |
+ _pcmpeq(MinusOnes, MinusOnes, IceType_f32); |
+ else |
+ _pcmpeq(MinusOnes, MinusOnes); |
return MinusOnes; |
} |