| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index 8e56a10bbb83adba9e884821290dda0f289b6c54..26d11b9208d84dde23ce57816bb0986be1c6a69d 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -1296,78 +1296,18 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| _movp(Dest, T4);
|
| } else {
|
| assert(Dest->getType() == IceType_v16i8);
|
| - // Sz_mul_v16i8
|
| - const IceString Helper = "Sz_mul_v16i8";
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| + scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| }
|
| } break;
|
| - case InstArithmetic::Shl: {
|
| - // Sz_shl_v4i32, Sz_shl_v8i16, Sz_shl_v16i8
|
| - const IceString Helper = "Sz_shl_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| - case InstArithmetic::Lshr: {
|
| - // Sz_lshr_v4i32, Sz_lshr_v8i16, Sz_lshr_v16i8
|
| - const IceString Helper = "Sz_lshr_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| - case InstArithmetic::Ashr: {
|
| - // Sz_ashr_v4i32, Sz_ashr_v8i16, Sz_ashr_v16i8
|
| - const IceString Helper = "Sz_ashr_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| - case InstArithmetic::Udiv: {
|
| - // Sz_udiv_v4i32, Sz_udiv_v8i16, Sz_udiv_v16i8
|
| - const IceString Helper = "Sz_udiv_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| - case InstArithmetic::Sdiv: {
|
| - // Sz_sdiv_v4i32, Sz_sdiv_v8i16, Sz_sdiv_v16i8
|
| - const IceString Helper = "Sz_sdiv_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| - case InstArithmetic::Urem: {
|
| - // Sz_urem_v4i32, Sz_urem_v8i16, Sz_urem_v16i8
|
| - const IceString Helper = "Sz_urem_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| - case InstArithmetic::Srem: {
|
| - // Sz_srem_v4i32, Sz_srem_v8i16, Sz_srem_v16i8
|
| - const IceString Helper = "Sz_srem_" + typeIdentString(Dest->getType());
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(Helper, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| + case InstArithmetic::Shl:
|
| + case InstArithmetic::Lshr:
|
| + case InstArithmetic::Ashr:
|
| + case InstArithmetic::Udiv:
|
| + case InstArithmetic::Urem:
|
| + case InstArithmetic::Sdiv:
|
| + case InstArithmetic::Srem:
|
| + scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| + break;
|
| case InstArithmetic::Fadd: {
|
| Variable *T = makeReg(Dest->getType());
|
| _movp(T, Src0);
|
| @@ -1392,13 +1332,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| _divps(T, LEGAL_HACK(Src1));
|
| _movp(Dest, T);
|
| } break;
|
| - case InstArithmetic::Frem: {
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall("Sz_frem_v4f32", Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - lowerCall(Call);
|
| - } break;
|
| + case InstArithmetic::Frem:
|
| + scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| + break;
|
| }
|
| #undef LEGAL_HACK
|
| } else { // Dest->getType() is non-i64 scalar
|
| @@ -1490,11 +1426,18 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| break;
|
| case InstArithmetic::Sdiv:
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - T_edx = makeReg(IceType_i32, Reg_edx);
|
| - _mov(T, Src0, Reg_eax);
|
| - _cdq(T_edx, T);
|
| - _idiv(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| + if (Dest->getType() == IceType_i8) {
|
| + _mov(T, Src0, Reg_eax);
|
| + _cbwdq(T, T);
|
| + _idiv(T, Src1, T);
|
| + _mov(Dest, T);
|
| + } else {
|
| + T_edx = makeReg(IceType_i32, Reg_edx);
|
| + _mov(T, Src0, Reg_eax);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T, Src1, T_edx);
|
| + _mov(Dest, T);
|
| + }
|
| break;
|
| case InstArithmetic::Urem:
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| @@ -1515,11 +1458,20 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| break;
|
| case InstArithmetic::Srem:
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - T_edx = makeReg(IceType_i32, Reg_edx);
|
| - _mov(T, Src0, Reg_eax);
|
| - _cdq(T_edx, T);
|
| - _idiv(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| + if (Dest->getType() == IceType_i8) {
|
| + Variable *T_ah = makeReg(IceType_i8, Reg_ah);
|
| + _mov(T, Src0, Reg_eax);
|
| + _cbwdq(T, T);
|
| + Context.insert(InstFakeDef::create(Func, T_ah));
|
| + _idiv(T_ah, Src1, T);
|
| + _mov(Dest, T_ah);
|
| + } else {
|
| + T_edx = makeReg(IceType_i32, Reg_edx);
|
| + _mov(T, Src0, Reg_eax);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T_edx, Src1, T);
|
| + _mov(Dest, T_edx);
|
| + }
|
| break;
|
| case InstArithmetic::Fadd:
|
| _mov(T, Src0);
|
| @@ -3744,6 +3696,39 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
|
| _br(Inst->getLabelDefault());
|
| }
|
|
|
| +void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
|
| + Variable *Dest, Operand *Src0,
|
| + Operand *Src1) {
|
| + assert(isVectorType(Dest->getType()));
|
| + Type Ty = Dest->getType();
|
| + Type ElementTy = typeElementType(Ty);
|
| + SizeT NumElements = typeNumElements(Ty);
|
| +
|
| + Operand *T = Ctx->getConstantUndef(Ty);
|
| + for (SizeT I = 0; I < NumElements; ++I) {
|
| + Constant *Index = Ctx->getConstantInt(IceType_i32, I);
|
| +
|
| + // Extract the next two inputs.
|
| + Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode());
|
| + lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
|
| + Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode());
|
| + lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
|
| +
|
| + // Perform the arithmetic as a scalar operation.
|
| + Variable *Res = Func->makeVariable(ElementTy, Context.getNode());
|
| + lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
|
| +
|
| + // Insert the result into position.
|
| + Variable *DestT = Func->makeVariable(Ty, Context.getNode());
|
| + lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
|
| + T = DestT;
|
| + // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of
|
| + // infinite weight temporaries.
|
| + }
|
| +
|
| + lowerAssign(InstAssign::create(Func, Dest, T));
|
| +}
|
| +
|
| // The following pattern occurs often in lowered C and C++ code:
|
| //
|
| // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
|
|
|