| Index: src/IceTargetLoweringX86BaseImpl.h
|
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
|
| index b22ec6441ec82ba6e7f41bb2b401e7b10961344e..56ee04d32e6510add168fd47871608b65c2cc9e2 100644
|
| --- a/src/IceTargetLoweringX86BaseImpl.h
|
| +++ b/src/IceTargetLoweringX86BaseImpl.h
|
| @@ -1216,8 +1216,7 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
|
| // t1:ecx = c.lo & 0xff
|
| // t2 = b.lo
|
| // t3 = b.hi
|
| - T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| - _mov(T_1, Src1Lo);
|
| + T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
|
| _mov(T_2, Src0Lo);
|
| _mov(T_3, Src0Hi);
|
| switch (Op) {
|
| @@ -1295,6 +1294,7 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
|
| template <class Machine>
|
| void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| Variable *Dest = Inst->getDest();
|
| + Type Ty = Dest->getType();
|
| Operand *Src0 = legalize(Inst->getSrc(0));
|
| Operand *Src1 = legalize(Inst->getSrc(1));
|
| if (Inst->isCommutative()) {
|
| @@ -1316,7 +1316,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| assert(SwapCount <= 1);
|
| (void)SwapCount;
|
| }
|
| - if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
|
| + if (!Traits::Is64Bit && Ty == IceType_i64) {
|
| // These x86-32 helper-call-involved instructions are lowered in this
|
| // separate switch. This is because loOperand() and hiOperand() may insert
|
| // redundant instructions for constant blinding and pooling. Such redundant
|
| @@ -1463,7 +1463,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| }
|
| return;
|
| }
|
| - if (isVectorType(Dest->getType())) {
|
| + if (isVectorType(Ty)) {
|
| // TODO: Trap on integer divide and integer modulo by zero. See:
|
| // https://code.google.com/p/nativeclient/issues/detail?id=3899
|
| if (llvm::isa<typename Traits::X86OperandMem>(Src1))
|
| @@ -1473,46 +1473,45 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| llvm_unreachable("Unknown arithmetic operator");
|
| break;
|
| case InstArithmetic::Add: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _padd(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::And: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _pand(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Or: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _por(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Xor: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _pxor(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Sub: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _psub(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Mul: {
|
| - bool TypesAreValidForPmull =
|
| - Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
|
| + bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
|
| bool InstructionSetIsValidForPmull =
|
| - Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
|
| + Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
|
| if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _pmull(T, Src0 == Src1 ? T : Src1);
|
| _movp(Dest, T);
|
| - } else if (Dest->getType() == IceType_v4i32) {
|
| + } else if (Ty == IceType_v4i32) {
|
| // Lowering sequence:
|
| // Note: The mask arguments have index 0 on the left.
|
| //
|
| @@ -1550,7 +1549,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
|
| _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
|
| _movp(Dest, T4);
|
| - } else if (Dest->getType() == IceType_v16i8) {
|
| + } else if (Ty == IceType_v16i8) {
|
| scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| } else {
|
| llvm::report_fatal_error("Invalid vector multiply type");
|
| @@ -1566,25 +1565,25 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| break;
|
| case InstArithmetic::Fadd: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _addps(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Fsub: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _subps(T, Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Fmul: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _mulps(T, Src0 == Src1 ? T : Src1);
|
| _movp(Dest, T);
|
| } break;
|
| case InstArithmetic::Fdiv: {
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(Ty);
|
| _movp(T, Src0);
|
| _divps(T, Src1);
|
| _movp(Dest, T);
|
| @@ -1633,13 +1632,13 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| }
|
| // The 8-bit version of imul only allows the form "imul r/m8" where T must
|
| // be in al.
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| + if (isByteSizedArithType(Ty)) {
|
| _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| _imul(T, Src0 == Src1 ? T : Src1);
|
| _mov(Dest, T);
|
| } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| - T = makeReg(Dest->getType());
|
| + T = makeReg(Ty);
|
| _imul_imm(T, Src0, ImmConst);
|
| _mov(Dest, T);
|
| } else {
|
| @@ -1650,76 +1649,51 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| break;
|
| case InstArithmetic::Shl:
|
| _mov(T, Src0);
|
| - if (!llvm::isa<ConstantInteger32>(Src1)) {
|
| - Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| - _mov(Cl, Src1);
|
| - Src1 = Cl;
|
| - }
|
| + if (!llvm::isa<ConstantInteger32>(Src1))
|
| + Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
|
| _shl(T, Src1);
|
| _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Lshr:
|
| _mov(T, Src0);
|
| - if (!llvm::isa<ConstantInteger32>(Src1)) {
|
| - Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| - _mov(Cl, Src1);
|
| - Src1 = Cl;
|
| - }
|
| + if (!llvm::isa<ConstantInteger32>(Src1))
|
| + Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
|
| _shr(T, Src1);
|
| _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Ashr:
|
| _mov(T, Src0);
|
| - if (!llvm::isa<ConstantInteger32>(Src1)) {
|
| - Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| - _mov(Cl, Src1);
|
| - Src1 = Cl;
|
| - }
|
| + if (!llvm::isa<ConstantInteger32>(Src1))
|
| + Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
|
| _sar(T, Src1);
|
| _mov(Dest, T);
|
| break;
|
| - case InstArithmetic::Udiv:
|
| + case InstArithmetic::Udiv: {
|
| // div and idiv are the few arithmetic operators that do not allow
|
| // immediates as the operand.
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - // For 8-bit unsigned division we need to zero-extend al into ah. A mov
|
| - // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
|
| - // assembler refuses to encode %ah (encoding %spl with a REX prefix
|
| - // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
|
| - // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
|
| - // d[%lh], which means the X86 target lowering (and the register
|
| - // allocator) would have to be aware of this restriction. For now, we
|
| - // simply zero %eax completely, and move the dividend into %al.
|
| - Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
|
| - Context.insert(InstFakeDef::create(Func, T_eax));
|
| - _xor(T_eax, T_eax);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| - _div(T, Src1, T);
|
| - _mov(Dest, T);
|
| - Context.insert(InstFakeUse::create(Func, T_eax));
|
| - } else {
|
| - Type Ty = Dest->getType();
|
| - uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| - uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| - switch (Ty) {
|
| - default:
|
| - llvm_unreachable("Bad type for udiv");
|
| - // fallthrough
|
| - case IceType_i32:
|
| - break;
|
| - case IceType_i16:
|
| - Eax = Traits::RegisterSet::Reg_ax;
|
| - Edx = Traits::RegisterSet::Reg_dx;
|
| - break;
|
| - }
|
| - Constant *Zero = Ctx->getConstantZero(Ty);
|
| - _mov(T, Src0, Eax);
|
| - _mov(T_edx, Zero, Edx);
|
| - _div(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| + uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| + uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| + switch (Ty) {
|
| + default:
|
| + llvm_unreachable("Bad type for udiv");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + break;
|
| + case IceType_i16:
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + Edx = Traits::RegisterSet::Reg_dx;
|
| + break;
|
| + case IceType_i8:
|
| + Eax = Traits::RegisterSet::Reg_al;
|
| + Edx = Traits::RegisterSet::Reg_ah;
|
| + break;
|
| }
|
| - break;
|
| + _mov(T, Src0, Eax);
|
| + _mov(T_edx, Ctx->getConstantZero(Ty), Edx);
|
| + _div(T, Src1, T_edx);
|
| + _mov(Dest, T);
|
| + } break;
|
| case InstArithmetic::Sdiv:
|
| // TODO(stichnot): Enable this after doing better performance and cross
|
| // testing.
|
| @@ -1731,7 +1705,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| uint32_t UDivisor = static_cast<uint32_t>(Divisor);
|
| if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
|
| uint32_t LogDiv = llvm::Log2_32(UDivisor);
|
| - Type Ty = Dest->getType();
|
| // LLVM does the following for dest=src/(1<<log):
|
| // t=src
|
| // sar t,typewidth-1 // -1 if src is negative, 0 if not
|
| @@ -1757,7 +1730,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| }
|
| }
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - switch (Type Ty = Dest->getType()) {
|
| + switch (Ty) {
|
| default:
|
| llvm_unreachable("Bad type for sdiv");
|
| // fallthrough
|
| @@ -1778,47 +1751,32 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| _idiv(T, Src1, T_edx);
|
| _mov(Dest, T);
|
| break;
|
| - case InstArithmetic::Urem:
|
| + case InstArithmetic::Urem: {
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
|
| - Context.insert(InstFakeDef::create(Func, T_eax));
|
| - _xor(T_eax, T_eax);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| - _div(T, Src1, T);
|
| - // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
|
| - // mov %ah, %al because it would make x86-64 codegen more complicated. If
|
| - // this ever becomes a problem we can introduce a pseudo rem instruction
|
| - // that returns the remainder in %al directly (and uses a mov for copying
|
| - // %ah to %al.)
|
| - static constexpr uint8_t AlSizeInBits = 8;
|
| - _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
|
| - _mov(Dest, T);
|
| - Context.insert(InstFakeUse::create(Func, T_eax));
|
| - } else {
|
| - Type Ty = Dest->getType();
|
| - uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| - uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| - switch (Ty) {
|
| - default:
|
| - llvm_unreachable("Bad type for urem");
|
| - // fallthrough
|
| - case IceType_i32:
|
| - break;
|
| - case IceType_i16:
|
| - Eax = Traits::RegisterSet::Reg_ax;
|
| - Edx = Traits::RegisterSet::Reg_dx;
|
| - break;
|
| - }
|
| - Constant *Zero = Ctx->getConstantZero(Ty);
|
| - T_edx = makeReg(Dest->getType(), Edx);
|
| - _mov(T_edx, Zero);
|
| - _mov(T, Src0, Eax);
|
| - _div(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| + uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| + uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| + switch (Ty) {
|
| + default:
|
| + llvm_unreachable("Bad type for urem");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + break;
|
| + case IceType_i16:
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + Edx = Traits::RegisterSet::Reg_dx;
|
| + break;
|
| + case IceType_i8:
|
| + Eax = Traits::RegisterSet::Reg_al;
|
| + Edx = Traits::RegisterSet::Reg_ah;
|
| + break;
|
| }
|
| - break;
|
| - case InstArithmetic::Srem:
|
| + T_edx = makeReg(Ty, Edx);
|
| + _mov(T_edx, Ctx->getConstantZero(Ty));
|
| + _mov(T, Src0, Eax);
|
| + _div(T_edx, Src1, T);
|
| + _mov(Dest, T_edx);
|
| + } break;
|
| + case InstArithmetic::Srem: {
|
| // TODO(stichnot): Enable this after doing better performance and cross
|
| // testing.
|
| if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
|
| @@ -1829,7 +1787,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| uint32_t UDivisor = static_cast<uint32_t>(Divisor);
|
| if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
|
| uint32_t LogDiv = llvm::Log2_32(UDivisor);
|
| - Type Ty = Dest->getType();
|
| // LLVM does the following for dest=src%(1<<log):
|
| // t=src
|
| // sar t,typewidth-1 // -1 if src is negative, 0 if not
|
| @@ -1860,37 +1817,29 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| }
|
| }
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - switch (Type Ty = Dest->getType()) {
|
| + uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| + uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| + switch (Ty) {
|
| default:
|
| llvm_unreachable("Bad type for srem");
|
| // fallthrough
|
| case IceType_i32:
|
| - T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| break;
|
| case IceType_i16:
|
| - T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_ax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + Edx = Traits::RegisterSet::Reg_dx;
|
| break;
|
| case IceType_i8:
|
| - T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
|
| - // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
|
| - // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T_edx, Src1, T);
|
| - static constexpr uint8_t AlSizeInBits = 8;
|
| - _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
|
| - _mov(Dest, T_edx);
|
| + Eax = Traits::RegisterSet::Reg_al;
|
| + Edx = Traits::RegisterSet::Reg_ah;
|
| break;
|
| }
|
| - break;
|
| + T_edx = makeReg(Ty, Edx);
|
| + _mov(T, Src0, Eax);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T_edx, Src1, T);
|
| + _mov(Dest, T_edx);
|
| + } break;
|
| case InstArithmetic::Fadd:
|
| _mov(T, Src0);
|
| _addss(T, Src1);
|
| @@ -1913,7 +1862,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| break;
|
| case InstArithmetic::Frem: {
|
| constexpr SizeT MaxSrcs = 2;
|
| - Type Ty = Dest->getType();
|
| InstCall *Call = makeHelperCall(
|
| isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
|
| Call->addArg(Src0);
|
| @@ -1991,6 +1939,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
|
| InstCast::OpKind CastKind = Inst->getCastKind();
|
| Variable *Dest = Inst->getDest();
|
| + Type DestTy = Dest->getType();
|
| switch (CastKind) {
|
| default:
|
| Func->setError("Cast type not supported");
|
| @@ -2003,15 +1952,14 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| // we're unlikely to see something like that in the bitcode that the
|
| // optimizer wouldn't have already taken care of.
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - if (isVectorType(Dest->getType())) {
|
| - Type DestTy = Dest->getType();
|
| + if (isVectorType(DestTy)) {
|
| if (DestTy == IceType_v16i8) {
|
| // onemask = materialize(1,1,...); dst = (src & onemask) > 0
|
| - Variable *OneMask = makeVectorOfOnes(Dest->getType());
|
| + Variable *OneMask = makeVectorOfOnes(DestTy);
|
| Variable *T = makeReg(DestTy);
|
| _movp(T, Src0RM);
|
| _pand(T, OneMask);
|
| - Variable *Zeros = makeVectorOfZeros(Dest->getType());
|
| + Variable *Zeros = makeVectorOfZeros(DestTy);
|
| _pcmpgt(T, Zeros);
|
| _movp(Dest, T);
|
| } else {
|
| @@ -2026,7 +1974,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| _psra(T, ShiftConstant);
|
| _movp(Dest, T);
|
| }
|
| - } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
|
| + } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
|
| // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
|
| Constant *Shift = Ctx->getConstantInt32(31);
|
| Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| @@ -2053,12 +2001,10 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| // shl t1, dst_bitwidth - 1
|
| // sar t1, dst_bitwidth - 1
|
| // dst = t1
|
| - size_t DestBits =
|
| - Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
|
| + size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
|
| Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
|
| - Variable *T = makeReg(Dest->getType());
|
| - if (typeWidthInBytes(Dest->getType()) <=
|
| - typeWidthInBytes(Src0RM->getType())) {
|
| + Variable *T = makeReg(DestTy);
|
| + if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
|
| _mov(T, Src0RM);
|
| } else {
|
| // Widen the source using movsx or movzx. (It doesn't matter which one,
|
| @@ -2070,7 +2016,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| _mov(Dest, T);
|
| } else {
|
| // t1 = movsx src; dst = t1
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(DestTy);
|
| _movsx(T, Src0RM);
|
| _mov(Dest, T);
|
| }
|
| @@ -2078,15 +2024,14 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| }
|
| case InstCast::Zext: {
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - if (isVectorType(Dest->getType())) {
|
| + if (isVectorType(DestTy)) {
|
| // onemask = materialize(1,1,...); dest = onemask & src
|
| - Type DestTy = Dest->getType();
|
| Variable *OneMask = makeVectorOfOnes(DestTy);
|
| Variable *T = makeReg(DestTy);
|
| _movp(T, Src0RM);
|
| _pand(T, OneMask);
|
| _movp(Dest, T);
|
| - } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
|
| + } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
|
| // t1=movzx src; dst.lo=t1; dst.hi=0
|
| Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| @@ -2101,7 +2046,6 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| _mov(DestHi, Zero);
|
| } else if (Src0RM->getType() == IceType_i1) {
|
| // t = Src0RM; Dest = t
|
| - Type DestTy = Dest->getType();
|
| Variable *T = nullptr;
|
| if (DestTy == IceType_i8) {
|
| _mov(T, Src0RM);
|
| @@ -2117,32 +2061,40 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| _mov(Dest, T);
|
| } else {
|
| // t1 = movzx src; dst = t1
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(DestTy);
|
| _movzx(T, Src0RM);
|
| _mov(Dest, T);
|
| }
|
| break;
|
| }
|
| case InstCast::Trunc: {
|
| - if (isVectorType(Dest->getType())) {
|
| + if (isVectorType(DestTy)) {
|
| // onemask = materialize(1,1,...); dst = src & onemask
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| Type Src0Ty = Src0RM->getType();
|
| Variable *OneMask = makeVectorOfOnes(Src0Ty);
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(DestTy);
|
| _movp(T, Src0RM);
|
| _pand(T, OneMask);
|
| _movp(Dest, T);
|
| + } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
|
| + // Make sure we truncate from and into valid registers.
|
| + Operand *Src0 = legalizeUndef(Inst->getSrc(0));
|
| + if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
|
| + Src0 = loOperand(Src0);
|
| + Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| + Variable *T = copyToReg8(Src0RM);
|
| + if (DestTy == IceType_i1)
|
| + _and(T, Ctx->getConstantInt1(1));
|
| + _mov(Dest, T);
|
| } else {
|
| Operand *Src0 = legalizeUndef(Inst->getSrc(0));
|
| if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
|
| Src0 = loOperand(Src0);
|
| Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| // t1 = trunc Src0RM; Dest = t1
|
| - Variable *T = nullptr;
|
| + Variable *T = makeReg(DestTy);
|
| _mov(T, Src0RM);
|
| - if (Dest->getType() == IceType_i1)
|
| - _and(T, Ctx->getConstantInt1(1));
|
| _mov(Dest, T);
|
| }
|
| break;
|
| @@ -2151,22 +2103,22 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| case InstCast::Fpext: {
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| // t1 = cvt Src0RM; Dest = t1
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(DestTy);
|
| _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
|
| _mov(Dest, T);
|
| break;
|
| }
|
| case InstCast::Fptosi:
|
| - if (isVectorType(Dest->getType())) {
|
| - assert(Dest->getType() == IceType_v4i32 &&
|
| + if (isVectorType(DestTy)) {
|
| + assert(DestTy == IceType_v4i32 &&
|
| Inst->getSrc(0)->getType() == IceType_v4f32);
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
|
| Src0RM = legalizeToReg(Src0RM);
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(DestTy);
|
| _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
|
| _movp(Dest, T);
|
| - } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
|
| + } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
|
| constexpr SizeT MaxSrcs = 1;
|
| Type SrcType = Inst->getSrc(0)->getType();
|
| InstCall *Call =
|
| @@ -2179,40 +2131,44 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
|
| Variable *T_1 = nullptr;
|
| - if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
|
| + if (Traits::Is64Bit && DestTy == IceType_i64) {
|
| T_1 = makeReg(IceType_i64);
|
| } else {
|
| - assert(Dest->getType() != IceType_i64);
|
| + assert(DestTy != IceType_i64);
|
| T_1 = makeReg(IceType_i32);
|
| }
|
| // cvt() requires its integer argument to be a GPR.
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| + Variable *T_2 = makeReg(DestTy);
|
| + if (isByteSizedType(DestTy)) {
|
| + assert(T_1->getType() == IceType_i32);
|
| + T_1->setRegClass(RCX86_Is32To8);
|
| + T_2->setRegClass(RCX86_IsTrunc8Rcvr);
|
| + }
|
| _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
|
| _mov(T_2, T_1); // T_1 and T_2 may have different integer types
|
| - if (Dest->getType() == IceType_i1)
|
| + if (DestTy == IceType_i1)
|
| _and(T_2, Ctx->getConstantInt1(1));
|
| _mov(Dest, T_2);
|
| }
|
| break;
|
| case InstCast::Fptoui:
|
| - if (isVectorType(Dest->getType())) {
|
| - assert(Dest->getType() == IceType_v4i32 &&
|
| + if (isVectorType(DestTy)) {
|
| + assert(DestTy == IceType_v4i32 &&
|
| Inst->getSrc(0)->getType() == IceType_v4f32);
|
| constexpr SizeT MaxSrcs = 1;
|
| InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
|
| Call->addArg(Inst->getSrc(0));
|
| lowerCall(Call);
|
| - } else if (Dest->getType() == IceType_i64 ||
|
| - (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
|
| + } else if (DestTy == IceType_i64 ||
|
| + (!Traits::Is64Bit && DestTy == IceType_i32)) {
|
| // Use a helper for both x86-32 and x86-64.
|
| constexpr SizeT MaxSrcs = 1;
|
| - Type DestType = Dest->getType();
|
| Type SrcType = Inst->getSrc(0)->getType();
|
| IceString TargetString;
|
| if (Traits::Is64Bit) {
|
| TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
|
| : H_fptoui_f64_i64;
|
| - } else if (isInt32Asserting32Or64(DestType)) {
|
| + } else if (isInt32Asserting32Or64(DestTy)) {
|
| TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
|
| : H_fptoui_f64_i32;
|
| } else {
|
| @@ -2226,39 +2182,43 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| } else {
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
|
| - assert(Dest->getType() != IceType_i64);
|
| + assert(DestTy != IceType_i64);
|
| Variable *T_1 = nullptr;
|
| - if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
|
| + if (Traits::Is64Bit && DestTy == IceType_i32) {
|
| T_1 = makeReg(IceType_i64);
|
| } else {
|
| - assert(Dest->getType() != IceType_i32);
|
| + assert(DestTy != IceType_i32);
|
| T_1 = makeReg(IceType_i32);
|
| }
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| + Variable *T_2 = makeReg(DestTy);
|
| + if (isByteSizedType(DestTy)) {
|
| + assert(T_1->getType() == IceType_i32);
|
| + T_1->setRegClass(RCX86_Is32To8);
|
| + T_2->setRegClass(RCX86_IsTrunc8Rcvr);
|
| + }
|
| _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
|
| _mov(T_2, T_1); // T_1 and T_2 may have different integer types
|
| - if (Dest->getType() == IceType_i1)
|
| + if (DestTy == IceType_i1)
|
| _and(T_2, Ctx->getConstantInt1(1));
|
| _mov(Dest, T_2);
|
| }
|
| break;
|
| case InstCast::Sitofp:
|
| - if (isVectorType(Dest->getType())) {
|
| - assert(Dest->getType() == IceType_v4f32 &&
|
| + if (isVectorType(DestTy)) {
|
| + assert(DestTy == IceType_v4f32 &&
|
| Inst->getSrc(0)->getType() == IceType_v4i32);
|
| Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
|
| Src0RM = legalizeToReg(Src0RM);
|
| - Variable *T = makeReg(Dest->getType());
|
| + Variable *T = makeReg(DestTy);
|
| _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
|
| _movp(Dest, T);
|
| } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
|
| // Use a helper for x86-32.
|
| constexpr SizeT MaxSrcs = 1;
|
| - Type DestType = Dest->getType();
|
| InstCall *Call =
|
| - makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
|
| - : H_sitofp_i64_f64,
|
| + makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
|
| + : H_sitofp_i64_f64,
|
| Dest, MaxSrcs);
|
| // TODO: Call the correct compiler-rt helper function.
|
| Call->addArg(Inst->getSrc(0));
|
| @@ -2275,7 +2235,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| assert(Src0RM->getType() != IceType_i64);
|
| T_1 = makeReg(IceType_i32);
|
| }
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| + Variable *T_2 = makeReg(DestTy);
|
| if (Src0RM->getType() == T_1->getType())
|
| _mov(T_1, Src0RM);
|
| else
|
| @@ -2287,8 +2247,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| case InstCast::Uitofp: {
|
| Operand *Src0 = Inst->getSrc(0);
|
| if (isVectorType(Src0->getType())) {
|
| - assert(Dest->getType() == IceType_v4f32 &&
|
| - Src0->getType() == IceType_v4i32);
|
| + assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32);
|
| constexpr SizeT MaxSrcs = 1;
|
| InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
|
| Call->addArg(Src0);
|
| @@ -2298,14 +2257,13 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
|
| // x86-32.
|
| constexpr SizeT MaxSrcs = 1;
|
| - Type DestType = Dest->getType();
|
| IceString TargetString;
|
| if (isInt32Asserting32Or64(Src0->getType())) {
|
| - TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
|
| - : H_uitofp_i32_f64;
|
| + TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
|
| + : H_uitofp_i32_f64;
|
| } else {
|
| - TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
|
| - : H_uitofp_i64_f64;
|
| + TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
|
| + : H_uitofp_i64_f64;
|
| }
|
| InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
|
| Call->addArg(Src0);
|
| @@ -2323,7 +2281,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
|
| T_1 = makeReg(IceType_i32);
|
| }
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| + Variable *T_2 = makeReg(DestTy);
|
| if (Src0RM->getType() == T_1->getType())
|
| _mov(T_1, Src0RM);
|
| else
|
| @@ -2335,12 +2293,12 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| }
|
| case InstCast::Bitcast: {
|
| Operand *Src0 = Inst->getSrc(0);
|
| - if (Dest->getType() == Src0->getType()) {
|
| + if (DestTy == Src0->getType()) {
|
| InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
|
| lowerAssign(Assign);
|
| return;
|
| }
|
| - switch (Dest->getType()) {
|
| + switch (DestTy) {
|
| default:
|
| llvm_unreachable("Unexpected Bitcast dest type");
|
| case IceType_i8: {
|
| @@ -2358,11 +2316,9 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| case IceType_i32:
|
| case IceType_f32: {
|
| Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - Type DestType = Dest->getType();
|
| Type SrcType = Src0RM->getType();
|
| - (void)DestType;
|
| - assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
|
| - (DestType == IceType_f32 && SrcType == IceType_i32));
|
| + assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
|
| + (DestTy == IceType_f32 && SrcType == IceType_i32));
|
| // a.i32 = bitcast b.f32 ==>
|
| // t.f32 = b.f32
|
| // s.f32 = spill t.f32
|
| @@ -2436,7 +2392,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| } else {
|
| Src0 = legalize(Src0);
|
| if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
|
| - Variable *T = Func->makeVariable(Dest->getType());
|
| + Variable *T = Func->makeVariable(DestTy);
|
| _movq(T, Src0);
|
| _movq(Dest, T);
|
| break;
|
| @@ -3037,17 +2993,21 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
|
| legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
|
| Variable *T = makeReg(Ty);
|
| _movp(T, SourceVectRM);
|
| - if (Ty == IceType_v4f32)
|
| + if (Ty == IceType_v4f32) {
|
| _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
|
| - else
|
| - // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
|
| - // operand is a register, it must be a full r32 register like eax, and not
|
| - // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
|
| - // for the use of r16 and r8 by converting them through getBaseReg(),
|
| - // while emitIAS() validates that the original and base register encodings
|
| - // are the same. But for an "interior" register like ah, it should
|
| - // probably be copied into an r32 via movzx so that the types work out.
|
| + } else {
|
| + // For the pinsrb and pinsrw instructions, when the source operand is a
|
| + // register, it must be a full r32 register like eax, and not ax/al/ah.
|
| + // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use
|
| + // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
|
| + // validates that the original and base register encodings are the same.
|
| + if (ElementRM->getType() == IceType_i8 &&
|
| + llvm::isa<Variable>(ElementRM)) {
|
| + // Don't use ah/bh/ch/dh for pinsrb.
|
| + ElementRM = copyToReg8(ElementRM);
|
| + }
|
| _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
|
| + }
|
| _movp(Inst->getDest(), T);
|
| } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
|
| // Use shufps or movss.
|
| @@ -5354,6 +5314,67 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
|
| return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
|
| }
|
|
|
| +/// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
|
| +/// Src is assumed to already be legalized. If the source operand is known to
|
| +/// be a memory or immediate operand, a simple mov will suffice. But if the
|
| +/// source operand can be a physical register, then it must first be copied into
|
| +/// a physical register that is truncable to 8-bit, then truncated into a
|
| +/// physical register that can receive a truncation, and finally copied into the
|
| +/// result 8-bit register (which in general can be any 8-bit register). For
|
| +/// example, moving %ebp into %ah may be accomplished as:
|
| +/// movl %ebp, %edx
|
| +/// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
|
| +/// movb %dl, %ah
|
| +/// On the other hand, moving a memory or immediate operand into ah:
|
| +/// movb 4(%ebp), %ah
|
| +/// movb $my_imm, %ah
|
| +///
|
| +/// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
|
| +/// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
|
| +/// use RegNum=NoRegister and then let the caller do a separate copy into
|
| +/// Reg_ah.
|
| +///
|
| +/// Note #2. ConstantRelocatable operands are also put through this process
|
| +/// (not truncated directly) because our ELF emitter does R_386_32 relocations
|
| +/// but not R_386_8 relocations.
|
| +///
|
| +/// Note #3. If Src is a Variable, the result will be an infinite-weight i8
|
| +/// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
|
| +/// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
|
| +/// to the pinsrb instruction.
|
| +template <class Machine>
|
| +Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) {
|
| + Type Ty = Src->getType();
|
| + assert(isScalarIntegerType(Ty));
|
| + assert(Ty != IceType_i1);
|
| + Variable *Reg = makeReg(IceType_i8, RegNum);
|
| + Reg->setRegClass(RCX86_IsTrunc8Rcvr);
|
| + if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
|
| + Variable *SrcTruncable = makeReg(Ty);
|
| + switch (Ty) {
|
| + case IceType_i64:
|
| + SrcTruncable->setRegClass(RCX86_Is64To8);
|
| + break;
|
| + case IceType_i32:
|
| + SrcTruncable->setRegClass(RCX86_Is32To8);
|
| + break;
|
| + case IceType_i16:
|
| + SrcTruncable->setRegClass(RCX86_Is16To8);
|
| + break;
|
| + default:
|
| + // i8 - just use default register class
|
| + break;
|
| + }
|
| + Variable *SrcRcvr = makeReg(IceType_i8);
|
| + SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
|
| + _mov(SrcTruncable, Src);
|
| + _mov(SrcRcvr, SrcTruncable);
|
| + Src = SrcRcvr;
|
| + }
|
| + _mov(Reg, Src);
|
| + return Reg;
|
| +}
|
| +
|
| /// Helper for legalize() to emit the right code to lower an operand to a
|
| /// register of the appropriate type.
|
| template <class Machine>
|
|
|