src/IceTargetLoweringX86BaseImpl.h - Issue 1531623007: Add option to force filetype=asm for testing

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1531623007: Add option to force filetype=asm for testing (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Missed one --sandbox in the wrong place. Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX86BaseImpl.h

diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h

index 2b94df0799dbd9ce97815b48218d29b857f5733a..0d4e6bb343405438dcebff08c1fa30af0aa78739 100644

--- a/src/IceTargetLoweringX86BaseImpl.h

+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -1135,8 +1135,8 @@ bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,

return false;

}

- // Lea optimization only works for i16 and i32 types, not i8.

- if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))

+ // Lea optimization only works for i32 type, not i1/i8/i16/i64.

+ if (Ty != IceType_i32 && (Count3 || Count5 || Count9))

return false;

// Limit the number of lea/shl operations for a single multiply, to a

// somewhat arbitrary choice of 3.

@@ -5590,6 +5590,7 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {

Variable *Dest = Cast->getDest();

const Type DestTy = Dest->getType();

const char *HelperName = nullptr;

+ Variable *CallDest = Dest;

switch (CastKind) {

default:

return;

@@ -5655,10 +5656,12 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {

case IceType_i8:

assert(Src0->getType() == IceType_v8i1);

HelperName = H_bitcast_8xi1_i8;

+ CallDest = Func->makeVariable(IceType_i32);

break;

case IceType_i16:

assert(Src0->getType() == IceType_v16i1);

HelperName = H_bitcast_16xi1_i16;

+ CallDest = Func->makeVariable(IceType_i32);

break;

case IceType_v8i1: {

assert(Src0->getType() == IceType_i8);

@@ -5680,10 +5683,14 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {

} break;

}

constexpr SizeT MaxSrcs = 1;

- InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

+ InstCall *Call = makeHelperCall(HelperName, CallDest, MaxSrcs);

Call->addArg(Src0);

StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);

Context.insert(Call);

+ // The PNaCl ABI disallows i8/i16 return types, so truncate the helper call

+ // result to the appropriate type as necessary.

+ if (CallDest->getType() != Dest->getType())

+ Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);

Cast->setDeleted();

} else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {

std::vector<Type> ArgTypes;

@@ -5841,7 +5848,14 @@ Variable *TargetX86Base<TraitsType>::makeVectorOfMinusOnes(Type Ty,

Variable *MinusOnes = makeReg(Ty, RegNum);

// Insert a FakeDef so the live range of MinusOnes is not overestimated.

Context.insert<InstFakeDef>(MinusOnes);

- _pcmpeq(MinusOnes, MinusOnes);

+ if (Ty == IceType_f64)

+ // Making a vector of minus ones of type f64 is currently only used for the

+ // fabs intrinsic. To use the f64 type to create this mask with pcmpeqq

+ // requires SSE 4.1. Since we're just creating a mask, pcmpeqd does the

+ // same job and only requires SSE2.

+ _pcmpeq(MinusOnes, MinusOnes, IceType_f32);

+ else

+ _pcmpeq(MinusOnes, MinusOnes);

return MinusOnes;

}

« pydir/run-pnacl-sz.py ('K') | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/assembler/x86/jump_encodings.ll » ('j') | no next file with comments »