Index: src/IceTargetLoweringX86BaseImpl.h |
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
index 01e2be0aca3697d2e6c71220c214587b8baccd0b..48c342f5f6770af2e0fc2c3577353e8749f2eeff 100644 |
--- a/src/IceTargetLoweringX86BaseImpl.h |
+++ b/src/IceTargetLoweringX86BaseImpl.h |
@@ -2488,7 +2488,8 @@ void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) { |
Variable::NoRegister)) { |
XmmArgs.push_back(Arg); |
} else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM && |
- (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) { |
+ (Traits::getRegisterForXmmArgNum(XmmArgs.size()) != |
+ Variable::NoRegister)) { |
XmmArgs.push_back(Arg); |
} else if (isScalarIntegerType(Ty) && |
(Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) != |
@@ -4600,27 +4601,30 @@ void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty, |
// |
// Otherwise: |
// bsr IF_NOT_ZERO, Val |
- // mov T_DEST, 63 |
+ // mov T_DEST, ((Ty == i32) ? 63 : 127) |
// cmovne T_DEST, IF_NOT_ZERO |
- // xor T_DEST, 31 |
+ // xor T_DEST, ((Ty == i32) ? 31 : 63) |
// mov DEST, T_DEST |
// |
// NOTE: T_DEST must be a register because cmov requires its dest to be a |
// register. Also, bsf and bsr require their dest to be a register. |
// |
- // The xor DEST, 31 converts a bit position to # of leading zeroes. |
+ // The xor DEST, C(31|63) converts a bit position to # of leading zeroes. |
// E.g., for 000... 00001100, bsr will say that the most significant bit |
// set is at position 3, while the number of leading zeros is 28. Xor is |
- // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case). |
+ // like (M - N) for N <= M, and converts 63 to 32, and 127 to 64 (for the |
+ // all-zeros case). |
// |
- // Similar for 64-bit, but start w/ speculating that the upper 32 bits |
- // are all zero, and compute the result for that case (checking the lower |
- // 32 bits). Then actually compute the result for the upper bits and |
+ // X8632 only: Similar for 64-bit, but start w/ speculating that the upper 32 |
+ // bits are all zero, and compute the result for that case (checking the |
+ // lower 32 bits). Then actually compute the result for the upper bits and |
// cmov in the result from the lower computation if the earlier speculation |
// was correct. |
// |
// Cttz, is similar, but uses bsf instead, and doesn't require the xor |
// bit position conversion, and the speculation is reversed. |
+ |
+ // TODO(jpp): refactor this method. |
assert(Ty == IceType_i32 || Ty == IceType_i64); |
const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32; |
Variable *T = makeReg(DestTy); |
@@ -4633,15 +4637,32 @@ void TargetX86Base<TraitsType>::lowerCountZeros(bool Cttz, Type Ty, |
Variable *T_Dest = makeReg(DestTy); |
Constant *_31 = Ctx->getConstantInt32(31); |
Constant *_32 = Ctx->getConstantInt(DestTy, 32); |
+ Constant *_63 = Ctx->getConstantInt(DestTy, 63); |
+ Constant *_64 = Ctx->getConstantInt(DestTy, 64); |
if (Cttz) { |
- _mov(T_Dest, _32); |
+ if (DestTy == IceType_i64) { |
+ _mov(T_Dest, _64); |
+ } else { |
+ _mov(T_Dest, _32); |
+ } |
} else { |
- Constant *_63 = Ctx->getConstantInt(DestTy, 63); |
- _mov(T_Dest, _63); |
+ Constant *_127 = Ctx->getConstantInt(DestTy, 127); |
+ if (DestTy == IceType_i64) { |
+ _mov(T_Dest, _127); |
+ } else { |
+ _mov(T_Dest, _63); |
+ } |
} |
_cmov(T_Dest, T, Traits::Cond::Br_ne); |
if (!Cttz) { |
- _xor(T_Dest, _31); |
+ if (DestTy == IceType_i64) { |
+ // Even though there's a _63 available at this point, that constant might |
+ // not be an i32, which will cause the xor emission to fail. |
+ Constant *_63 = Ctx->getConstantInt32(63); |
+ _xor(T_Dest, _63); |
+ } else { |
+ _xor(T_Dest, _31); |
+ } |
} |
if (Traits::Is64Bit || Ty == IceType_i32) { |
_mov(Dest, T_Dest); |