Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1427973003: Subzero: Refactor x86 register representation to actively use aliases. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Reformat Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTargetLoweringX86RegClass.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX86BaseImpl.h
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index b22ec6441ec82ba6e7f41bb2b401e7b10961344e..56ee04d32e6510add168fd47871608b65c2cc9e2 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1216,8 +1216,7 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
// t1:ecx = c.lo & 0xff
// t2 = b.lo
// t3 = b.hi
- T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(T_1, Src1Lo);
+ T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
_mov(T_2, Src0Lo);
_mov(T_3, Src0Hi);
switch (Op) {
@@ -1295,6 +1294,7 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
template <class Machine>
void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
+ Type Ty = Dest->getType();
Operand *Src0 = legalize(Inst->getSrc(0));
Operand *Src1 = legalize(Inst->getSrc(1));
if (Inst->isCommutative()) {
@@ -1316,7 +1316,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
assert(SwapCount <= 1);
(void)SwapCount;
}
- if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
// These x86-32 helper-call-involved instructions are lowered in this
// separate switch. This is because loOperand() and hiOperand() may insert
// redundant instructions for constant blinding and pooling. Such redundant
@@ -1463,7 +1463,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
}
return;
}
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(Ty)) {
// TODO: Trap on integer divide and integer modulo by zero. See:
// https://code.google.com/p/nativeclient/issues/detail?id=3899
if (llvm::isa<typename Traits::X86OperandMem>(Src1))
@@ -1473,46 +1473,45 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
llvm_unreachable("Unknown arithmetic operator");
break;
case InstArithmetic::Add: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_padd(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::And: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_pand(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Or: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_por(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Xor: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_pxor(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Sub: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_psub(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Mul: {
- bool TypesAreValidForPmull =
- Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
+ bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
bool InstructionSetIsValidForPmull =
- Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
+ Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_pmull(T, Src0 == Src1 ? T : Src1);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_v4i32) {
+ } else if (Ty == IceType_v4i32) {
// Lowering sequence:
// Note: The mask arguments have index 0 on the left.
//
@@ -1550,7 +1549,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
_shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
_pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
_movp(Dest, T4);
- } else if (Dest->getType() == IceType_v16i8) {
+ } else if (Ty == IceType_v16i8) {
scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
} else {
llvm::report_fatal_error("Invalid vector multiply type");
@@ -1566,25 +1565,25 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
break;
case InstArithmetic::Fadd: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_addps(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Fsub: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_subps(T, Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Fmul: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_mulps(T, Src0 == Src1 ? T : Src1);
_movp(Dest, T);
} break;
case InstArithmetic::Fdiv: {
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(Ty);
_movp(T, Src0);
_divps(T, Src1);
_movp(Dest, T);
@@ -1633,13 +1632,13 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
}
// The 8-bit version of imul only allows the form "imul r/m8" where T must
// be in al.
- if (isByteSizedArithType(Dest->getType())) {
+ if (isByteSizedArithType(Ty)) {
_mov(T, Src0, Traits::RegisterSet::Reg_al);
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
_imul(T, Src0 == Src1 ? T : Src1);
_mov(Dest, T);
} else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
- T = makeReg(Dest->getType());
+ T = makeReg(Ty);
_imul_imm(T, Src0, ImmConst);
_mov(Dest, T);
} else {
@@ -1650,76 +1649,51 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
break;
case InstArithmetic::Shl:
_mov(T, Src0);
- if (!llvm::isa<ConstantInteger32>(Src1)) {
- Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(Cl, Src1);
- Src1 = Cl;
- }
+ if (!llvm::isa<ConstantInteger32>(Src1))
+ Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
_shl(T, Src1);
_mov(Dest, T);
break;
case InstArithmetic::Lshr:
_mov(T, Src0);
- if (!llvm::isa<ConstantInteger32>(Src1)) {
- Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(Cl, Src1);
- Src1 = Cl;
- }
+ if (!llvm::isa<ConstantInteger32>(Src1))
+ Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
_shr(T, Src1);
_mov(Dest, T);
break;
case InstArithmetic::Ashr:
_mov(T, Src0);
- if (!llvm::isa<ConstantInteger32>(Src1)) {
- Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
- _mov(Cl, Src1);
- Src1 = Cl;
- }
+ if (!llvm::isa<ConstantInteger32>(Src1))
+ Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
_sar(T, Src1);
_mov(Dest, T);
break;
- case InstArithmetic::Udiv:
+ case InstArithmetic::Udiv: {
// div and idiv are the few arithmetic operators that do not allow
// immediates as the operand.
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- // For 8-bit unsigned division we need to zero-extend al into ah. A mov
- // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
- // assembler refuses to encode %ah (encoding %spl with a REX prefix
- // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
- // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
- // d[%lh], which means the X86 target lowering (and the register
- // allocator) would have to be aware of this restriction. For now, we
- // simply zero %eax completely, and move the dividend into %al.
- Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- Context.insert(InstFakeDef::create(Func, T_eax));
- _xor(T_eax, T_eax);
- _mov(T, Src0, Traits::RegisterSet::Reg_al);
- _div(T, Src1, T);
- _mov(Dest, T);
- Context.insert(InstFakeUse::create(Func, T_eax));
- } else {
- Type Ty = Dest->getType();
- uint32_t Eax = Traits::RegisterSet::Reg_eax;
- uint32_t Edx = Traits::RegisterSet::Reg_edx;
- switch (Ty) {
- default:
- llvm_unreachable("Bad type for udiv");
- // fallthrough
- case IceType_i32:
- break;
- case IceType_i16:
- Eax = Traits::RegisterSet::Reg_ax;
- Edx = Traits::RegisterSet::Reg_dx;
- break;
- }
- Constant *Zero = Ctx->getConstantZero(Ty);
- _mov(T, Src0, Eax);
- _mov(T_edx, Zero, Edx);
- _div(T, Src1, T_edx);
- _mov(Dest, T);
+ uint32_t Eax = Traits::RegisterSet::Reg_eax;
+ uint32_t Edx = Traits::RegisterSet::Reg_edx;
+ switch (Ty) {
+ default:
+ llvm_unreachable("Bad type for udiv");
+ // fallthrough
+ case IceType_i32:
+ break;
+ case IceType_i16:
+ Eax = Traits::RegisterSet::Reg_ax;
+ Edx = Traits::RegisterSet::Reg_dx;
+ break;
+ case IceType_i8:
+ Eax = Traits::RegisterSet::Reg_al;
+ Edx = Traits::RegisterSet::Reg_ah;
+ break;
}
- break;
+ _mov(T, Src0, Eax);
+ _mov(T_edx, Ctx->getConstantZero(Ty), Edx);
+ _div(T, Src1, T_edx);
+ _mov(Dest, T);
+ } break;
case InstArithmetic::Sdiv:
// TODO(stichnot): Enable this after doing better performance and cross
// testing.
@@ -1731,7 +1705,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
uint32_t UDivisor = static_cast<uint32_t>(Divisor);
if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
uint32_t LogDiv = llvm::Log2_32(UDivisor);
- Type Ty = Dest->getType();
// LLVM does the following for dest=src/(1<<log):
// t=src
// sar t,typewidth-1 // -1 if src is negative, 0 if not
@@ -1757,7 +1730,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
}
}
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- switch (Type Ty = Dest->getType()) {
+ switch (Ty) {
default:
llvm_unreachable("Bad type for sdiv");
// fallthrough
@@ -1778,47 +1751,32 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
_idiv(T, Src1, T_edx);
_mov(Dest, T);
break;
- case InstArithmetic::Urem:
+ case InstArithmetic::Urem: {
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
- Context.insert(InstFakeDef::create(Func, T_eax));
- _xor(T_eax, T_eax);
- _mov(T, Src0, Traits::RegisterSet::Reg_al);
- _div(T, Src1, T);
- // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
- // mov %ah, %al because it would make x86-64 codegen more complicated. If
- // this ever becomes a problem we can introduce a pseudo rem instruction
- // that returns the remainder in %al directly (and uses a mov for copying
- // %ah to %al.)
- static constexpr uint8_t AlSizeInBits = 8;
- _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
- _mov(Dest, T);
- Context.insert(InstFakeUse::create(Func, T_eax));
- } else {
- Type Ty = Dest->getType();
- uint32_t Eax = Traits::RegisterSet::Reg_eax;
- uint32_t Edx = Traits::RegisterSet::Reg_edx;
- switch (Ty) {
- default:
- llvm_unreachable("Bad type for urem");
- // fallthrough
- case IceType_i32:
- break;
- case IceType_i16:
- Eax = Traits::RegisterSet::Reg_ax;
- Edx = Traits::RegisterSet::Reg_dx;
- break;
- }
- Constant *Zero = Ctx->getConstantZero(Ty);
- T_edx = makeReg(Dest->getType(), Edx);
- _mov(T_edx, Zero);
- _mov(T, Src0, Eax);
- _div(T_edx, Src1, T);
- _mov(Dest, T_edx);
+ uint32_t Eax = Traits::RegisterSet::Reg_eax;
+ uint32_t Edx = Traits::RegisterSet::Reg_edx;
+ switch (Ty) {
+ default:
+ llvm_unreachable("Bad type for urem");
+ // fallthrough
+ case IceType_i32:
+ break;
+ case IceType_i16:
+ Eax = Traits::RegisterSet::Reg_ax;
+ Edx = Traits::RegisterSet::Reg_dx;
+ break;
+ case IceType_i8:
+ Eax = Traits::RegisterSet::Reg_al;
+ Edx = Traits::RegisterSet::Reg_ah;
+ break;
}
- break;
- case InstArithmetic::Srem:
+ T_edx = makeReg(Ty, Edx);
+ _mov(T_edx, Ctx->getConstantZero(Ty));
+ _mov(T, Src0, Eax);
+ _div(T_edx, Src1, T);
+ _mov(Dest, T_edx);
+ } break;
+ case InstArithmetic::Srem: {
// TODO(stichnot): Enable this after doing better performance and cross
// testing.
if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
@@ -1829,7 +1787,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
uint32_t UDivisor = static_cast<uint32_t>(Divisor);
if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
uint32_t LogDiv = llvm::Log2_32(UDivisor);
- Type Ty = Dest->getType();
// LLVM does the following for dest=src%(1<<log):
// t=src
// sar t,typewidth-1 // -1 if src is negative, 0 if not
@@ -1860,37 +1817,29 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
}
}
Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- switch (Type Ty = Dest->getType()) {
+ uint32_t Eax = Traits::RegisterSet::Reg_eax;
+ uint32_t Edx = Traits::RegisterSet::Reg_edx;
+ switch (Ty) {
default:
llvm_unreachable("Bad type for srem");
// fallthrough
case IceType_i32:
- T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
- _mov(T, Src0, Traits::RegisterSet::Reg_eax);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- _mov(Dest, T_edx);
break;
case IceType_i16:
- T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
- _mov(T, Src0, Traits::RegisterSet::Reg_ax);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- _mov(Dest, T_edx);
+ Eax = Traits::RegisterSet::Reg_ax;
+ Edx = Traits::RegisterSet::Reg_dx;
break;
case IceType_i8:
- T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
- // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
- // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
- _mov(T, Src0, Traits::RegisterSet::Reg_al);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- static constexpr uint8_t AlSizeInBits = 8;
- _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
- _mov(Dest, T_edx);
+ Eax = Traits::RegisterSet::Reg_al;
+ Edx = Traits::RegisterSet::Reg_ah;
break;
}
- break;
+ T_edx = makeReg(Ty, Edx);
+ _mov(T, Src0, Eax);
+ _cbwdq(T_edx, T);
+ _idiv(T_edx, Src1, T);
+ _mov(Dest, T_edx);
+ } break;
case InstArithmetic::Fadd:
_mov(T, Src0);
_addss(T, Src1);
@@ -1913,7 +1862,6 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
break;
case InstArithmetic::Frem: {
constexpr SizeT MaxSrcs = 2;
- Type Ty = Dest->getType();
InstCall *Call = makeHelperCall(
isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
Call->addArg(Src0);
@@ -1991,6 +1939,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
InstCast::OpKind CastKind = Inst->getCastKind();
Variable *Dest = Inst->getDest();
+ Type DestTy = Dest->getType();
switch (CastKind) {
default:
Func->setError("Cast type not supported");
@@ -2003,15 +1952,14 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
// we're unlikely to see something like that in the bitcode that the
// optimizer wouldn't have already taken care of.
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (isVectorType(Dest->getType())) {
- Type DestTy = Dest->getType();
+ if (isVectorType(DestTy)) {
if (DestTy == IceType_v16i8) {
// onemask = materialize(1,1,...); dst = (src & onemask) > 0
- Variable *OneMask = makeVectorOfOnes(Dest->getType());
+ Variable *OneMask = makeVectorOfOnes(DestTy);
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
- Variable *Zeros = makeVectorOfZeros(Dest->getType());
+ Variable *Zeros = makeVectorOfZeros(DestTy);
_pcmpgt(T, Zeros);
_movp(Dest, T);
} else {
@@ -2026,7 +1974,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_psra(T, ShiftConstant);
_movp(Dest, T);
}
- } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Constant *Shift = Ctx->getConstantInt32(31);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -2053,12 +2001,10 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
// shl t1, dst_bitwidth - 1
// sar t1, dst_bitwidth - 1
// dst = t1
- size_t DestBits =
- Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
+ size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
- Variable *T = makeReg(Dest->getType());
- if (typeWidthInBytes(Dest->getType()) <=
- typeWidthInBytes(Src0RM->getType())) {
+ Variable *T = makeReg(DestTy);
+ if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
_mov(T, Src0RM);
} else {
// Widen the source using movsx or movzx. (It doesn't matter which one,
@@ -2070,7 +2016,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_mov(Dest, T);
} else {
// t1 = movsx src; dst = t1
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_movsx(T, Src0RM);
_mov(Dest, T);
}
@@ -2078,15 +2024,14 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
}
case InstCast::Zext: {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(DestTy)) {
// onemask = materialize(1,1,...); dest = onemask & src
- Type DestTy = Dest->getType();
Variable *OneMask = makeVectorOfOnes(DestTy);
Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
- } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
// t1=movzx src; dst.lo=t1; dst.hi=0
Constant *Zero = Ctx->getConstantZero(IceType_i32);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -2101,7 +2046,6 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_mov(DestHi, Zero);
} else if (Src0RM->getType() == IceType_i1) {
// t = Src0RM; Dest = t
- Type DestTy = Dest->getType();
Variable *T = nullptr;
if (DestTy == IceType_i8) {
_mov(T, Src0RM);
@@ -2117,32 +2061,40 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_mov(Dest, T);
} else {
// t1 = movzx src; dst = t1
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_movzx(T, Src0RM);
_mov(Dest, T);
}
break;
}
case InstCast::Trunc: {
- if (isVectorType(Dest->getType())) {
+ if (isVectorType(DestTy)) {
// onemask = materialize(1,1,...); dst = src & onemask
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
Type Src0Ty = Src0RM->getType();
Variable *OneMask = makeVectorOfOnes(Src0Ty);
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
+ } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
+ // Make sure we truncate from and into valid registers.
+ Operand *Src0 = legalizeUndef(Inst->getSrc(0));
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
+ Src0 = loOperand(Src0);
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ Variable *T = copyToReg8(Src0RM);
+ if (DestTy == IceType_i1)
+ _and(T, Ctx->getConstantInt1(1));
+ _mov(Dest, T);
} else {
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
Src0 = loOperand(Src0);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// t1 = trunc Src0RM; Dest = t1
- Variable *T = nullptr;
+ Variable *T = makeReg(DestTy);
_mov(T, Src0RM);
- if (Dest->getType() == IceType_i1)
- _and(T, Ctx->getConstantInt1(1));
_mov(Dest, T);
}
break;
@@ -2151,22 +2103,22 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
case InstCast::Fpext: {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1 = cvt Src0RM; Dest = t1
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
_mov(Dest, T);
break;
}
case InstCast::Fptosi:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4i32 &&
+ if (isVectorType(DestTy)) {
+ assert(DestTy == IceType_v4i32 &&
Inst->getSrc(0)->getType() == IceType_v4f32);
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
- } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
constexpr SizeT MaxSrcs = 1;
Type SrcType = Inst->getSrc(0)->getType();
InstCall *Call =
@@ -2179,40 +2131,44 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Variable *T_1 = nullptr;
- if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ if (Traits::Is64Bit && DestTy == IceType_i64) {
T_1 = makeReg(IceType_i64);
} else {
- assert(Dest->getType() != IceType_i64);
+ assert(DestTy != IceType_i64);
T_1 = makeReg(IceType_i32);
}
// cvt() requires its integer argument to be a GPR.
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
+ if (isByteSizedType(DestTy)) {
+ assert(T_1->getType() == IceType_i32);
+ T_1->setRegClass(RCX86_Is32To8);
+ T_2->setRegClass(RCX86_IsTrunc8Rcvr);
+ }
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
- if (Dest->getType() == IceType_i1)
+ if (DestTy == IceType_i1)
_and(T_2, Ctx->getConstantInt1(1));
_mov(Dest, T_2);
}
break;
case InstCast::Fptoui:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4i32 &&
+ if (isVectorType(DestTy)) {
+ assert(DestTy == IceType_v4i32 &&
Inst->getSrc(0)->getType() == IceType_v4f32);
constexpr SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
Call->addArg(Inst->getSrc(0));
lowerCall(Call);
- } else if (Dest->getType() == IceType_i64 ||
- (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
+ } else if (DestTy == IceType_i64 ||
+ (!Traits::Is64Bit && DestTy == IceType_i32)) {
// Use a helper for both x86-32 and x86-64.
constexpr SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
Type SrcType = Inst->getSrc(0)->getType();
IceString TargetString;
if (Traits::Is64Bit) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
: H_fptoui_f64_i64;
- } else if (isInt32Asserting32Or64(DestType)) {
+ } else if (isInt32Asserting32Or64(DestTy)) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
: H_fptoui_f64_i32;
} else {
@@ -2226,39 +2182,43 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- assert(Dest->getType() != IceType_i64);
+ assert(DestTy != IceType_i64);
Variable *T_1 = nullptr;
- if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
+ if (Traits::Is64Bit && DestTy == IceType_i32) {
T_1 = makeReg(IceType_i64);
} else {
- assert(Dest->getType() != IceType_i32);
+ assert(DestTy != IceType_i32);
T_1 = makeReg(IceType_i32);
}
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
+ if (isByteSizedType(DestTy)) {
+ assert(T_1->getType() == IceType_i32);
+ T_1->setRegClass(RCX86_Is32To8);
+ T_2->setRegClass(RCX86_IsTrunc8Rcvr);
+ }
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
- if (Dest->getType() == IceType_i1)
+ if (DestTy == IceType_i1)
_and(T_2, Ctx->getConstantInt1(1));
_mov(Dest, T_2);
}
break;
case InstCast::Sitofp:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4f32 &&
+ if (isVectorType(DestTy)) {
+ assert(DestTy == IceType_v4f32 &&
Inst->getSrc(0)->getType() == IceType_v4i32);
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
- Variable *T = makeReg(Dest->getType());
+ Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T);
} else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32.
constexpr SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
InstCall *Call =
- makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
- : H_sitofp_i64_f64,
+ makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
+ : H_sitofp_i64_f64,
Dest, MaxSrcs);
// TODO: Call the correct compiler-rt helper function.
Call->addArg(Inst->getSrc(0));
@@ -2275,7 +2235,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
assert(Src0RM->getType() != IceType_i64);
T_1 = makeReg(IceType_i32);
}
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
@@ -2287,8 +2247,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
case InstCast::Uitofp: {
Operand *Src0 = Inst->getSrc(0);
if (isVectorType(Src0->getType())) {
- assert(Dest->getType() == IceType_v4f32 &&
- Src0->getType() == IceType_v4i32);
+ assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32);
constexpr SizeT MaxSrcs = 1;
InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
Call->addArg(Src0);
@@ -2298,14 +2257,13 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
// Use a helper for x86-32 and x86-64. Also use a helper for i32 on
// x86-32.
constexpr SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
IceString TargetString;
if (isInt32Asserting32Or64(Src0->getType())) {
- TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
- : H_uitofp_i32_f64;
+ TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
+ : H_uitofp_i32_f64;
} else {
- TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
- : H_uitofp_i64_f64;
+ TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
+ : H_uitofp_i64_f64;
}
InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
Call->addArg(Src0);
@@ -2323,7 +2281,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
T_1 = makeReg(IceType_i32);
}
- Variable *T_2 = makeReg(Dest->getType());
+ Variable *T_2 = makeReg(DestTy);
if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
@@ -2335,12 +2293,12 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
}
case InstCast::Bitcast: {
Operand *Src0 = Inst->getSrc(0);
- if (Dest->getType() == Src0->getType()) {
+ if (DestTy == Src0->getType()) {
InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
lowerAssign(Assign);
return;
}
- switch (Dest->getType()) {
+ switch (DestTy) {
default:
llvm_unreachable("Unexpected Bitcast dest type");
case IceType_i8: {
@@ -2358,11 +2316,9 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
case IceType_i32:
case IceType_f32: {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- Type DestType = Dest->getType();
Type SrcType = Src0RM->getType();
- (void)DestType;
- assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
- (DestType == IceType_f32 && SrcType == IceType_i32));
+ assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
+ (DestTy == IceType_f32 && SrcType == IceType_i32));
// a.i32 = bitcast b.f32 ==>
// t.f32 = b.f32
// s.f32 = spill t.f32
@@ -2436,7 +2392,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
} else {
Src0 = legalize(Src0);
if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
- Variable *T = Func->makeVariable(Dest->getType());
+ Variable *T = Func->makeVariable(DestTy);
_movq(T, Src0);
_movq(Dest, T);
break;
@@ -3037,17 +2993,21 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
Variable *T = makeReg(Ty);
_movp(T, SourceVectRM);
- if (Ty == IceType_v4f32)
+ if (Ty == IceType_v4f32) {
_insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
- else
- // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
- // operand is a register, it must be a full r32 register like eax, and not
- // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
- // for the use of r16 and r8 by converting them through getBaseReg(),
- // while emitIAS() validates that the original and base register encodings
- // are the same. But for an "interior" register like ah, it should
- // probably be copied into an r32 via movzx so that the types work out.
+ } else {
+ // For the pinsrb and pinsrw instructions, when the source operand is a
+ // register, it must be a full r32 register like eax, and not ax/al/ah.
+ // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use
+ // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
+ // validates that the original and base register encodings are the same.
+ if (ElementRM->getType() == IceType_i8 &&
+ llvm::isa<Variable>(ElementRM)) {
+ // Don't use ah/bh/ch/dh for pinsrb.
+ ElementRM = copyToReg8(ElementRM);
+ }
_pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
+ }
_movp(Inst->getDest(), T);
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use shufps or movss.
@@ -5354,6 +5314,67 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
}
+/// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
+/// Src is assumed to already be legalized. If the source operand is known to
+/// be a memory or immediate operand, a simple mov will suffice. But if the
+/// source operand can be a physical register, then it must first be copied into
+/// a physical register that is truncable to 8-bit, then truncated into a
+/// physical register that can receive a truncation, and finally copied into the
+/// result 8-bit register (which in general can be any 8-bit register). For
+/// example, moving %ebp into %ah may be accomplished as:
+/// movl %ebp, %edx
+/// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
+/// movb %dl, %ah
+/// On the other hand, moving a memory or immediate operand into ah:
+/// movb 4(%ebp), %ah
+/// movb $my_imm, %ah
+///
+/// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
+/// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
+/// use RegNum=NoRegister and then let the caller do a separate copy into
+/// Reg_ah.
+///
+/// Note #2. ConstantRelocatable operands are also put through this process
+/// (not truncated directly) because our ELF emitter does R_386_32 relocations
+/// but not R_386_8 relocations.
+///
+/// Note #3. If Src is a Variable, the result will be an infinite-weight i8
+/// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
+/// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
+/// to the pinsrb instruction.
+template <class Machine>
+Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) {
+ Type Ty = Src->getType();
+ assert(isScalarIntegerType(Ty));
+ assert(Ty != IceType_i1);
+ Variable *Reg = makeReg(IceType_i8, RegNum);
+ Reg->setRegClass(RCX86_IsTrunc8Rcvr);
+ if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
+ Variable *SrcTruncable = makeReg(Ty);
+ switch (Ty) {
+ case IceType_i64:
+ SrcTruncable->setRegClass(RCX86_Is64To8);
+ break;
+ case IceType_i32:
+ SrcTruncable->setRegClass(RCX86_Is32To8);
+ break;
+ case IceType_i16:
+ SrcTruncable->setRegClass(RCX86_Is16To8);
+ break;
+ default:
+ // i8 - just use default register class
+ break;
+ }
+ Variable *SrcRcvr = makeReg(IceType_i8);
+ SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
+ _mov(SrcTruncable, Src);
+ _mov(SrcRcvr, SrcTruncable);
+ Src = SrcRcvr;
+ }
+ _mov(Reg, Src);
+ return Reg;
+}
+
/// Helper for legalize() to emit the right code to lower an operand to a
/// register of the appropriate type.
template <class Machine>
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTargetLoweringX86RegClass.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698