| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index 506b315079909210d3b1f7c9dc1fbee21e0e53ea..f6cd26f64f5299550de9cc42d20ee07c8f68e400 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -1287,6 +1287,102 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
|
| _mov(Dest, esp);
|
| }
|
|
|
| +// Strength-reduce scalar integer multiplication by a constant (for
|
| +// i32 or narrower) for certain constants. The lea instruction can be
|
| +// used to multiply by 3, 5, or 9, and the lsh instruction can be used
|
| +// to multiply by powers of 2. These can be combined such that
|
| +// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
|
| +// combined with left-shifting by 2.
|
| +bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,
|
| + int32_t Src1) {
|
| + // Disable this optimization for Om1 and O0, just to keep things
|
| + // simple there.
|
| + if (Ctx->getFlags().getOptLevel() < Opt_1)
|
| + return false;
|
| + Type Ty = Dest->getType();
|
| + Variable *T = nullptr;
|
| + if (Src1 == -1) {
|
| + _mov(T, Src0);
|
| + _neg(T);
|
| + _mov(Dest, T);
|
| + return true;
|
| + }
|
| + if (Src1 == 0) {
|
| + _mov(Dest, Ctx->getConstantZero(Ty));
|
| + return true;
|
| + }
|
| + if (Src1 == 1) {
|
| + _mov(T, Src0);
|
| + _mov(Dest, T);
|
| + return true;
|
| + }
|
| + // Don't bother with the edge case where Src1 == MININT.
|
| + if (Src1 == -Src1)
|
| + return false;
|
| + const bool Src1IsNegative = Src1 < 0;
|
| + if (Src1IsNegative)
|
| + Src1 = -Src1;
|
| + uint32_t Count9 = 0;
|
| + uint32_t Count5 = 0;
|
| + uint32_t Count3 = 0;
|
| + uint32_t Count2 = 0;
|
| + uint32_t CountOps = 0;
|
| + while (Src1 > 1) {
|
| + if (Src1 % 9 == 0) {
|
| + ++CountOps;
|
| + ++Count9;
|
| + Src1 /= 9;
|
| + } else if (Src1 % 5 == 0) {
|
| + ++CountOps;
|
| + ++Count5;
|
| + Src1 /= 5;
|
| + } else if (Src1 % 3 == 0) {
|
| + ++CountOps;
|
| + ++Count3;
|
| + Src1 /= 3;
|
| + } else if (Src1 % 2 == 0) {
|
| + if (Count2 == 0)
|
| + ++CountOps;
|
| + ++Count2;
|
| + Src1 /= 2;
|
| + } else {
|
| + return false;
|
| + }
|
| + }
|
| + // Lea optimization only works for i16 and i32 types, not i8.
|
| + if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
|
| + return false;
|
| + // Limit the number of lea/shl operations for a single multiply, to
|
| + // a somewhat arbitrary choice of 3.
|
| + const uint32_t MaxOpsForOptimizedMul = 3;
|
| + if (CountOps > MaxOpsForOptimizedMul)
|
| + return false;
|
| + _mov(T, Src0);
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| + for (uint32_t i = 0; i < Count9; ++i) {
|
| + const uint16_t Shift = 3; // log2(9-1)
|
| + _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
|
| + _set_dest_nonkillable();
|
| + }
|
| + for (uint32_t i = 0; i < Count5; ++i) {
|
| + const uint16_t Shift = 2; // log2(5-1)
|
| + _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
|
| + _set_dest_nonkillable();
|
| + }
|
| + for (uint32_t i = 0; i < Count3; ++i) {
|
| + const uint16_t Shift = 1; // log2(3-1)
|
| + _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
|
| + _set_dest_nonkillable();
|
| + }
|
| + if (Count2) {
|
| + _shl(T, Ctx->getConstantInt(Ty, Count2));
|
| + }
|
| + if (Src1IsNegative)
|
| + _neg(T);
|
| + _mov(Dest, T);
|
| + return true;
|
| +}
|
| +
|
| void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| Variable *Dest = Inst->getDest();
|
| Operand *Src0 = legalize(Inst->getSrc(0));
|
| @@ -1294,6 +1390,8 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| if (Inst->isCommutative()) {
|
| if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
|
| std::swap(Src0, Src1);
|
| + if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
|
| + std::swap(Src0, Src1);
|
| }
|
| if (Dest->getType() == IceType_i64) {
|
| Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| @@ -1521,7 +1619,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| llvm_unreachable("FP instruction with i64 type");
|
| break;
|
| }
|
| - } else if (isVectorType(Dest->getType())) {
|
| + return;
|
| + }
|
| + if (isVectorType(Dest->getType())) {
|
| // TODO: Trap on integer divide and integer modulo by zero.
|
| // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
|
| if (llvm::isa<OperandX8632Mem>(Src1))
|
| @@ -1650,175 +1750,248 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| break;
|
| }
|
| - } else { // Dest->getType() is non-i64 scalar
|
| - Variable *T_edx = nullptr;
|
| - Variable *T = nullptr;
|
| - switch (Inst->getOp()) {
|
| - case InstArithmetic::_num:
|
| - llvm_unreachable("Unknown arithmetic operator");
|
| - break;
|
| - case InstArithmetic::Add:
|
| - _mov(T, Src0);
|
| - _add(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::And:
|
| - _mov(T, Src0);
|
| - _and(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Or:
|
| - _mov(T, Src0);
|
| - _or(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Xor:
|
| + return;
|
| + }
|
| + Variable *T_edx = nullptr;
|
| + Variable *T = nullptr;
|
| + switch (Inst->getOp()) {
|
| + case InstArithmetic::_num:
|
| + llvm_unreachable("Unknown arithmetic operator");
|
| + break;
|
| + case InstArithmetic::Add:
|
| + _mov(T, Src0);
|
| + _add(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::And:
|
| + _mov(T, Src0);
|
| + _and(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Or:
|
| + _mov(T, Src0);
|
| + _or(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Xor:
|
| + _mov(T, Src0);
|
| + _xor(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Sub:
|
| + _mov(T, Src0);
|
| + _sub(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Mul:
|
| + if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| + if (optimizeScalarMul(Dest, Src0, C->getValue()))
|
| + return;
|
| + }
|
| + // The 8-bit version of imul only allows the form "imul r/m8"
|
| + // where T must be in eax.
|
| + if (isByteSizedArithType(Dest->getType())) {
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| + } else {
|
| _mov(T, Src0);
|
| - _xor(T, Src1);
|
| + }
|
| + _imul(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Shl:
|
| + _mov(T, Src0);
|
| + if (!llvm::isa<Constant>(Src1))
|
| + Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| + _shl(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Lshr:
|
| + _mov(T, Src0);
|
| + if (!llvm::isa<Constant>(Src1))
|
| + Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| + _shr(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Ashr:
|
| + _mov(T, Src0);
|
| + if (!llvm::isa<Constant>(Src1))
|
| + Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| + _sar(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Udiv:
|
| + // div and idiv are the few arithmetic operators that do not allow
|
| + // immediates as the operand.
|
| + Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| + if (isByteSizedArithType(Dest->getType())) {
|
| + Variable *T_ah = nullptr;
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i8);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _mov(T_ah, Zero, RegX8632::Reg_ah);
|
| + _div(T, Src1, T_ah);
|
| _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Sub:
|
| - _mov(T, Src0);
|
| - _sub(T, Src1);
|
| + } else {
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _mov(T_edx, Zero, RegX8632::Reg_edx);
|
| + _div(T, Src1, T_edx);
|
| _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Mul:
|
| - // TODO: Optimize for llvm::isa<Constant>(Src1)
|
| - // TODO: Strength-reduce multiplications by a constant,
|
| - // particularly -1 and powers of 2. Advanced: use lea to
|
| - // multiply by 3, 5, 9.
|
| - //
|
| - // The 8-bit version of imul only allows the form "imul r/m8"
|
| - // where T must be in eax.
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - } else {
|
| - _mov(T, Src0);
|
| + }
|
| + break;
|
| + case InstArithmetic::Sdiv:
|
| + // TODO(stichnot): Enable this after doing better performance
|
| + // and cross testing.
|
| + if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
|
| + // Optimize division by constant power of 2, but not for Om1
|
| + // or O0, just to keep things simple there.
|
| + if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| + int32_t Divisor = C->getValue();
|
| + uint32_t UDivisor = static_cast<uint32_t>(Divisor);
|
| + if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
|
| + uint32_t LogDiv = llvm::Log2_32(UDivisor);
|
| + Type Ty = Dest->getType();
|
| + // LLVM does the following for dest=src/(1<<log):
|
| + // t=src
|
| + // sar t,typewidth-1 // -1 if src is negative, 0 if not
|
| + // shr t,typewidth-log
|
| + // add t,src
|
| + // sar t,log
|
| + // dest=t
|
| + uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
|
| + _mov(T, Src0);
|
| + // If for some reason we are dividing by 1, just treat it
|
| + // like an assignment.
|
| + if (LogDiv > 0) {
|
| + // The initial sar is unnecessary when dividing by 2.
|
| + if (LogDiv > 1)
|
| + _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
|
| + _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
|
| + _add(T, Src0);
|
| + _sar(T, Ctx->getConstantInt(Ty, LogDiv));
|
| + }
|
| + _mov(Dest, T);
|
| + return;
|
| + }
|
| }
|
| - _imul(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Shl:
|
| - _mov(T, Src0);
|
| - if (!llvm::isa<Constant>(Src1))
|
| - Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| - _shl(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Lshr:
|
| - _mov(T, Src0);
|
| - if (!llvm::isa<Constant>(Src1))
|
| - Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| - _shr(T, Src1);
|
| + }
|
| + Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| + if (isByteSizedArithType(Dest->getType())) {
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _cbwdq(T, T);
|
| + _idiv(T, Src1, T);
|
| _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Ashr:
|
| - _mov(T, Src0);
|
| - if (!llvm::isa<Constant>(Src1))
|
| - Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| - _sar(T, Src1);
|
| + } else {
|
| + T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T, Src1, T_edx);
|
| _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Udiv:
|
| - // div and idiv are the few arithmetic operators that do not allow
|
| - // immediates as the operand.
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_ah = nullptr;
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i8);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _mov(T_ah, Zero, RegX8632::Reg_ah);
|
| - _div(T, Src1, T_ah);
|
| - _mov(Dest, T);
|
| - } else {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _mov(T_edx, Zero, RegX8632::Reg_edx);
|
| - _div(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - case InstArithmetic::Sdiv:
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T, T);
|
| - _idiv(T, Src1, T);
|
| - _mov(Dest, T);
|
| - } else {
|
| - T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - case InstArithmetic::Urem:
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_ah = nullptr;
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i8);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _mov(T_ah, Zero, RegX8632::Reg_ah);
|
| - _div(T_ah, Src1, T);
|
| - _mov(Dest, T_ah);
|
| - } else {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _mov(T_edx, Zero, RegX8632::Reg_edx);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _div(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| - }
|
| - break;
|
| - case InstArithmetic::Srem:
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T, T);
|
| - Context.insert(InstFakeDef::create(Func, T_ah));
|
| - _idiv(T_ah, Src1, T);
|
| - _mov(Dest, T_ah);
|
| - } else {
|
| - T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| + }
|
| + break;
|
| + case InstArithmetic::Urem:
|
| + Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| + if (isByteSizedArithType(Dest->getType())) {
|
| + Variable *T_ah = nullptr;
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i8);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _mov(T_ah, Zero, RegX8632::Reg_ah);
|
| + _div(T_ah, Src1, T);
|
| + _mov(Dest, T_ah);
|
| + } else {
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| + _mov(T_edx, Zero, RegX8632::Reg_edx);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _div(T_edx, Src1, T);
|
| + _mov(Dest, T_edx);
|
| + }
|
| + break;
|
| + case InstArithmetic::Srem:
|
| + // TODO(stichnot): Enable this after doing better performance
|
| + // and cross testing.
|
| + if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
|
| + // Optimize mod by constant power of 2, but not for Om1 or O0,
|
| + // just to keep things simple there.
|
| + if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| + int32_t Divisor = C->getValue();
|
| + uint32_t UDivisor = static_cast<uint32_t>(Divisor);
|
| + if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
|
| + uint32_t LogDiv = llvm::Log2_32(UDivisor);
|
| + Type Ty = Dest->getType();
|
| + // LLVM does the following for dest=src%(1<<log):
|
| + // t=src
|
| + // sar t,typewidth-1 // -1 if src is negative, 0 if not
|
| + // shr t,typewidth-log
|
| + // add t,src
|
| + // and t, -(1<<log)
|
| + // sub t,src
|
| + // neg t
|
| + // dest=t
|
| + uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
|
| + // If for some reason we are dividing by 1, just assign 0.
|
| + if (LogDiv == 0) {
|
| + _mov(Dest, Ctx->getConstantZero(Ty));
|
| + return;
|
| + }
|
| + _mov(T, Src0);
|
| + // The initial sar is unnecessary when dividing by 2.
|
| + if (LogDiv > 1)
|
| + _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
|
| + _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
|
| + _add(T, Src0);
|
| + _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
|
| + _sub(T, Src0);
|
| + _neg(T);
|
| + _mov(Dest, T);
|
| + return;
|
| + }
|
| }
|
| - break;
|
| - case InstArithmetic::Fadd:
|
| - _mov(T, Src0);
|
| - _addss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Fsub:
|
| - _mov(T, Src0);
|
| - _subss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Fmul:
|
| - _mov(T, Src0);
|
| - _mulss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Fdiv:
|
| - _mov(T, Src0);
|
| - _divss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Frem: {
|
| - const SizeT MaxSrcs = 2;
|
| - Type Ty = Dest->getType();
|
| - InstCall *Call =
|
| - makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64,
|
| - Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - return lowerCall(Call);
|
| - } break;
|
| }
|
| + Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| + if (isByteSizedArithType(Dest->getType())) {
|
| + Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _cbwdq(T, T);
|
| + Context.insert(InstFakeDef::create(Func, T_ah));
|
| + _idiv(T_ah, Src1, T);
|
| + _mov(Dest, T_ah);
|
| + } else {
|
| + T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| + _mov(T, Src0, RegX8632::Reg_eax);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T_edx, Src1, T);
|
| + _mov(Dest, T_edx);
|
| + }
|
| + break;
|
| + case InstArithmetic::Fadd:
|
| + _mov(T, Src0);
|
| + _addss(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Fsub:
|
| + _mov(T, Src0);
|
| + _subss(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Fmul:
|
| + _mov(T, Src0);
|
| + _mulss(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Fdiv:
|
| + _mov(T, Src0);
|
| + _divss(T, Src1);
|
| + _mov(Dest, T);
|
| + break;
|
| + case InstArithmetic::Frem: {
|
| + const SizeT MaxSrcs = 2;
|
| + Type Ty = Dest->getType();
|
| + InstCall *Call = makeHelperCall(
|
| + isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
|
| + Call->addArg(Src0);
|
| + Call->addArg(Src1);
|
| + return lowerCall(Call);
|
| + }
|
| }
|
| }
|
|
|
| @@ -3119,10 +3292,11 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| Func->setError("Unexpected memory ordering for AtomicRMW");
|
| return;
|
| }
|
| - lowerAtomicRMW(Instr->getDest(),
|
| - static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
|
| - Instr->getArg(0))->getValue()),
|
| - Instr->getArg(1), Instr->getArg(2));
|
| + lowerAtomicRMW(
|
| + Instr->getDest(),
|
| + static_cast<uint32_t>(
|
| + llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
|
| + Instr->getArg(1), Instr->getArg(2));
|
| return;
|
| case Intrinsics::AtomicStore: {
|
| if (!Intrinsics::isMemoryOrderValid(
|
|
|