 Chromium Code Reviews
 Chromium Code Reviews Issue 1531623007:
  Add option to force filetype=asm for testing  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
    
  
    Issue 1531623007:
  Add option to force filetype=asm for testing  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master| Index: src/IceTargetLoweringX86BaseImpl.h | 
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h | 
| index 2b94df0799dbd9ce97815b48218d29b857f5733a..cb62a78fe358209e0f6447494d90c728474b9fbc 100644 | 
| --- a/src/IceTargetLoweringX86BaseImpl.h | 
| +++ b/src/IceTargetLoweringX86BaseImpl.h | 
| @@ -1135,8 +1135,8 @@ bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 
| return false; | 
| } | 
| } | 
| - // Lea optimization only works for i16 and i32 types, not i8. | 
| - if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 
| + // Lea optimization only works for i32 type, not i8 or i16. | 
| 
Jim Stichnoth
2016/01/12 14:54:02
What's the story with i64 under x86-64?  Ask John,
 
sehr
2016/01/12 19:01:19
Not supported either.  I changed the comment.
 | 
| + if (Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 
| return false; | 
| // Limit the number of lea/shl operations for a single multiply, to a | 
| // somewhat arbitrary choice of 3. | 
| @@ -5590,6 +5590,7 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { | 
| Variable *Dest = Cast->getDest(); | 
| const Type DestTy = Dest->getType(); | 
| const char *HelperName = nullptr; | 
| + Variable *CallDest = Dest; | 
| switch (CastKind) { | 
| default: | 
| return; | 
| @@ -5655,10 +5656,12 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { | 
| case IceType_i8: | 
| assert(Src0->getType() == IceType_v8i1); | 
| HelperName = H_bitcast_8xi1_i8; | 
| + CallDest = Func->makeVariable(IceType_i32); | 
| break; | 
| case IceType_i16: | 
| assert(Src0->getType() == IceType_v16i1); | 
| HelperName = H_bitcast_16xi1_i16; | 
| + CallDest = Func->makeVariable(IceType_i32); | 
| break; | 
| case IceType_v8i1: { | 
| assert(Src0->getType() == IceType_i8); | 
| @@ -5680,10 +5683,14 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { | 
| } break; | 
| } | 
| constexpr SizeT MaxSrcs = 1; | 
| - InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | 
| + InstCall *Call = makeHelperCall(HelperName, CallDest, MaxSrcs); | 
| Call->addArg(Src0); | 
| StackArgumentsSize = getCallStackArgumentsSizeBytes(Call); | 
| Context.insert(Call); | 
| + // Sometimes we need to convert from i8 or i16 type to i32 to maintain ABI | 
| 
Jim Stichnoth
2016/01/12 14:54:02
This is a little confusing because we're actually
 
sehr
2016/01/12 19:01:19
Done.
 | 
| + // compatibility. If so, insert a conversion operator after the call. | 
| + if (CallDest != Dest) | 
| 
Jim Stichnoth
2016/01/12 14:54:02
I think it would be marginally more future-proof i
 
sehr
2016/01/12 19:01:19
Done.
 | 
| + Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest); | 
| Cast->setDeleted(); | 
| } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) { | 
| std::vector<Type> ArgTypes; | 
| @@ -5841,7 +5848,14 @@ Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty, | 
| Variable *MinusOnes = makeReg(Ty, RegNum); | 
| // Insert a FakeDef so the live range of MinusOnes is not overestimated. | 
| Context.insert<InstFakeDef>(MinusOnes); | 
| - _pcmpeq(MinusOnes, MinusOnes); | 
| + if (Ty == IceType_f64) | 
| + // Making a vector of minus ones of type f64 is currently only used for the | 
| + // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq | 
| + // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the | 
| + // same job and only requires SSE2. | 
| + _pcmpeq(MinusOnes, MinusOnes, IceType_f32); | 
| + else | 
| + _pcmpeq(MinusOnes, MinusOnes); | 
| return MinusOnes; | 
| } |