| Index: src/IceTargetLoweringX86BaseImpl.h
|
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
|
| index 8739c77ac8e99351c19df3736faed4789cbc5be1..59fdf135e7305afa97dc37ed181834c79936426e 100644
|
| --- a/src/IceTargetLoweringX86BaseImpl.h
|
| +++ b/src/IceTargetLoweringX86BaseImpl.h
|
| @@ -587,7 +587,7 @@ template <class Machine> void TargetX86Base<Machine>::findRMW() {
|
| // Converts a ConstantInteger32 operand into its constant value, or
|
| // MemoryOrderInvalid if the operand is not a ConstantInteger32.
|
| inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
|
| - if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
|
| + if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
|
| return Integer->getValue();
|
| return Intrinsics::MemoryOrderInvalid;
|
| }
|
| @@ -622,7 +622,7 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
|
| if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
|
| // An InstLoad always qualifies.
|
| LoadDest = Load->getDest();
|
| - const bool DoLegalize = false;
|
| + constexpr bool DoLegalize = false;
|
| LoadSrc = formMemoryOperand(Load->getSourceAddress(),
|
| LoadDest->getType(), DoLegalize);
|
| } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
|
| @@ -635,7 +635,7 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
|
| Intrinsics::isMemoryOrderValid(
|
| ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
|
| LoadDest = Intrin->getDest();
|
| - const bool DoLegalize = false;
|
| + constexpr bool DoLegalize = false;
|
| LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
|
| DoLegalize);
|
| }
|
| @@ -733,8 +733,8 @@ Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
|
| }
|
|
|
| template <class Machine>
|
| -IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
|
| - return Traits::getRegName(RegNum, Ty);
|
| +IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const {
|
| + return Traits::getRegName(RegNum);
|
| }
|
|
|
| template <class Machine>
|
| @@ -797,8 +797,7 @@ TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
|
| if (!hasFramePointer())
|
| Offset += getStackAdjustment();
|
| }
|
| - return typename Traits::Address(
|
| - Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
|
| + return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset);
|
| }
|
|
|
| /// Helper function for addProlog().
|
| @@ -1047,23 +1046,23 @@ bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
|
| return false;
|
| // Limit the number of lea/shl operations for a single multiply, to a
|
| // somewhat arbitrary choice of 3.
|
| - const uint32_t MaxOpsForOptimizedMul = 3;
|
| + constexpr uint32_t MaxOpsForOptimizedMul = 3;
|
| if (CountOps > MaxOpsForOptimizedMul)
|
| return false;
|
| _mov(T, Src0);
|
| Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| for (uint32_t i = 0; i < Count9; ++i) {
|
| - const uint16_t Shift = 3; // log2(9-1)
|
| + constexpr uint16_t Shift = 3; // log2(9-1)
|
| _lea(T,
|
| Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
|
| }
|
| for (uint32_t i = 0; i < Count5; ++i) {
|
| - const uint16_t Shift = 2; // log2(5-1)
|
| + constexpr uint16_t Shift = 2; // log2(5-1)
|
| _lea(T,
|
| Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
|
| }
|
| for (uint32_t i = 0; i < Count3; ++i) {
|
| - const uint16_t Shift = 1; // log2(3-1)
|
| + constexpr uint16_t Shift = 1; // log2(3-1)
|
| _lea(T,
|
| Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
|
| }
|
| @@ -1215,7 +1214,8 @@ void TargetX86Base<Machine>::lowerShift64(InstArithmetic::OpKind Op,
|
| // t1:ecx = c.lo & 0xff
|
| // t2 = b.lo
|
| // t3 = b.hi
|
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
|
| + T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| + _mov(T_1, Src1Lo);
|
| _mov(T_2, Src0Lo);
|
| _mov(T_3, Src0Hi);
|
| switch (Op) {
|
| @@ -1323,7 +1323,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| // and hiOperand() to be used.
|
| switch (Inst->getOp()) {
|
| case InstArithmetic::Udiv: {
|
| - const SizeT MaxSrcs = 2;
|
| + constexpr SizeT MaxSrcs = 2;
|
| InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
|
| Call->addArg(Inst->getSrc(0));
|
| Call->addArg(Inst->getSrc(1));
|
| @@ -1331,7 +1331,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| return;
|
| }
|
| case InstArithmetic::Sdiv: {
|
| - const SizeT MaxSrcs = 2;
|
| + constexpr SizeT MaxSrcs = 2;
|
| InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
|
| Call->addArg(Inst->getSrc(0));
|
| Call->addArg(Inst->getSrc(1));
|
| @@ -1339,7 +1339,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| return;
|
| }
|
| case InstArithmetic::Urem: {
|
| - const SizeT MaxSrcs = 2;
|
| + constexpr SizeT MaxSrcs = 2;
|
| InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
|
| Call->addArg(Inst->getSrc(0));
|
| Call->addArg(Inst->getSrc(1));
|
| @@ -1347,7 +1347,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| return;
|
| }
|
| case InstArithmetic::Srem: {
|
| - const SizeT MaxSrcs = 2;
|
| + constexpr SizeT MaxSrcs = 2;
|
| InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
|
| Call->addArg(Inst->getSrc(0));
|
| Call->addArg(Inst->getSrc(1));
|
| @@ -1528,14 +1528,14 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
|
|
| // Mask that directs pshufd to create a vector with entries
|
| // Src[1, 0, 3, 0]
|
| - const unsigned Constant1030 = 0x31;
|
| + constexpr unsigned Constant1030 = 0x31;
|
| Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
|
| // Mask that directs shufps to create a vector with entries
|
| // Dest[0, 2], Src[0, 2]
|
| - const unsigned Mask0202 = 0x88;
|
| + constexpr unsigned Mask0202 = 0x88;
|
| // Mask that directs pshufd to create a vector with entries
|
| // Src[0, 2, 1, 3]
|
| - const unsigned Mask0213 = 0xd8;
|
| + constexpr unsigned Mask0213 = 0xd8;
|
| Variable *T1 = makeReg(IceType_v4i32);
|
| Variable *T2 = makeReg(IceType_v4i32);
|
| Variable *T3 = makeReg(IceType_v4i32);
|
| @@ -1630,9 +1630,9 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| return;
|
| }
|
| // The 8-bit version of imul only allows the form "imul r/m8" where T must
|
| - // be in eax.
|
| + // be in al.
|
| if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| _imul(T, Src0 == Src1 ? T : Src1);
|
| _mov(Dest, T);
|
| @@ -1648,22 +1648,31 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| break;
|
| case InstArithmetic::Shl:
|
| _mov(T, Src0);
|
| - if (!llvm::isa<ConstantInteger32>(Src1))
|
| - Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
|
| + if (!llvm::isa<ConstantInteger32>(Src1)) {
|
| + Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| + _mov(Cl, Src1);
|
| + Src1 = Cl;
|
| + }
|
| _shl(T, Src1);
|
| _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Lshr:
|
| _mov(T, Src0);
|
| - if (!llvm::isa<ConstantInteger32>(Src1))
|
| - Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
|
| + if (!llvm::isa<ConstantInteger32>(Src1)) {
|
| + Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| + _mov(Cl, Src1);
|
| + Src1 = Cl;
|
| + }
|
| _shr(T, Src1);
|
| _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Ashr:
|
| _mov(T, Src0);
|
| - if (!llvm::isa<ConstantInteger32>(Src1))
|
| - Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);
|
| + if (!llvm::isa<ConstantInteger32>(Src1)) {
|
| + Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
|
| + _mov(Cl, Src1);
|
| + Src1 = Cl;
|
| + }
|
| _sar(T, Src1);
|
| _mov(Dest, T);
|
| break;
|
| @@ -1683,14 +1692,28 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
|
| Context.insert(InstFakeDef::create(Func, T_eax));
|
| _xor(T_eax, T_eax);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| _div(T, Src1, T);
|
| _mov(Dest, T);
|
| Context.insert(InstFakeUse::create(Func, T_eax));
|
| } else {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| - _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
|
| + Type Ty = Dest->getType();
|
| + uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| + uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| + switch (Ty) {
|
| + default:
|
| + llvm_unreachable("Bad type for udiv");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + break;
|
| + case IceType_i16:
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + Edx = Traits::RegisterSet::Reg_dx;
|
| + break;
|
| + }
|
| + Constant *Zero = Ctx->getConstantZero(Ty);
|
| + _mov(T, Src0, Eax);
|
| + _mov(T_edx, Zero, Edx);
|
| _div(T, Src1, T_edx);
|
| _mov(Dest, T);
|
| }
|
| @@ -1732,18 +1755,26 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| }
|
| }
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| - _cbwdq(T, T);
|
| - _idiv(T, Src1, T);
|
| - _mov(Dest, T);
|
| - } else {
|
| - T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
|
| + switch (Type Ty = Dest->getType()) {
|
| + default:
|
| + llvm_unreachable("Bad type for sdiv");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
|
| _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| + break;
|
| + case IceType_i16:
|
| + T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_ax);
|
| + break;
|
| + case IceType_i8:
|
| + T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| + break;
|
| }
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T, Src1, T_edx);
|
| + _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Urem:
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| @@ -1751,7 +1782,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
|
| Context.insert(InstFakeDef::create(Func, T_eax));
|
| _xor(T_eax, T_eax);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| _div(T, Src1, T);
|
| // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
|
| // mov %ah, %al because it would make x86-64 codegen more complicated. If
|
| @@ -1763,10 +1794,24 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| _mov(Dest, T);
|
| Context.insert(InstFakeUse::create(Func, T_eax));
|
| } else {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
|
| + Type Ty = Dest->getType();
|
| + uint32_t Eax = Traits::RegisterSet::Reg_eax;
|
| + uint32_t Edx = Traits::RegisterSet::Reg_edx;
|
| + switch (Ty) {
|
| + default:
|
| + llvm_unreachable("Bad type for urem");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + break;
|
| + case IceType_i16:
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + Edx = Traits::RegisterSet::Reg_dx;
|
| + break;
|
| + }
|
| + Constant *Zero = Ctx->getConstantZero(Ty);
|
| + T_edx = makeReg(Dest->getType(), Edx);
|
| _mov(T_edx, Zero);
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| + _mov(T, Src0, Eax);
|
| _div(T_edx, Src1, T);
|
| _mov(Dest, T_edx);
|
| }
|
| @@ -1813,28 +1858,35 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| }
|
| }
|
| Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| - // T is %al.
|
| - _cbwdq(T, T);
|
| - _idiv(T, Src1, T);
|
| - Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
|
| - Context.insert(InstFakeDef::create(Func, T_eax));
|
| - // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
|
| - // mov %ah, %al because it would make x86-64 codegen more complicated. If
|
| - // this ever becomes a problem we can introduce a pseudo rem instruction
|
| - // that returns the remainder in %al directly (and uses a mov for copying
|
| - // %ah to %al.)
|
| - static constexpr uint8_t AlSizeInBits = 8;
|
| - _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
|
| - _mov(Dest, T);
|
| - Context.insert(InstFakeUse::create(Func, T_eax));
|
| - } else {
|
| - T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
|
| + switch (Type Ty = Dest->getType()) {
|
| + default:
|
| + llvm_unreachable("Bad type for srem");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
|
| _mov(T, Src0, Traits::RegisterSet::Reg_eax);
|
| _cbwdq(T_edx, T);
|
| _idiv(T_edx, Src1, T);
|
| _mov(Dest, T_edx);
|
| + break;
|
| + case IceType_i16:
|
| + T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_ax);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T_edx, Src1, T);
|
| + _mov(Dest, T_edx);
|
| + break;
|
| + case IceType_i8:
|
| + T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
|
| + // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
|
| + // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
|
| + _mov(T, Src0, Traits::RegisterSet::Reg_al);
|
| + _cbwdq(T_edx, T);
|
| + _idiv(T_edx, Src1, T);
|
| + static constexpr uint8_t AlSizeInBits = 8;
|
| + _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
|
| + _mov(Dest, T_edx);
|
| + break;
|
| }
|
| break;
|
| case InstArithmetic::Fadd:
|
| @@ -1858,7 +1910,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
|
| _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Frem: {
|
| - const SizeT MaxSrcs = 2;
|
| + constexpr SizeT MaxSrcs = 2;
|
| Type Ty = Dest->getType();
|
| InstCall *Call = makeHelperCall(
|
| isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
|
| @@ -2113,7 +2165,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
|
| _movp(Dest, T);
|
| } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
|
| - const SizeT MaxSrcs = 1;
|
| + constexpr SizeT MaxSrcs = 1;
|
| Type SrcType = Inst->getSrc(0)->getType();
|
| InstCall *Call =
|
| makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
|
| @@ -2144,14 +2196,14 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| if (isVectorType(Dest->getType())) {
|
| assert(Dest->getType() == IceType_v4i32 &&
|
| Inst->getSrc(0)->getType() == IceType_v4f32);
|
| - const SizeT MaxSrcs = 1;
|
| + constexpr SizeT MaxSrcs = 1;
|
| InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
|
| Call->addArg(Inst->getSrc(0));
|
| lowerCall(Call);
|
| } else if (Dest->getType() == IceType_i64 ||
|
| (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
|
| // Use a helper for both x86-32 and x86-64.
|
| - const SizeT MaxSrcs = 1;
|
| + constexpr SizeT MaxSrcs = 1;
|
| Type DestType = Dest->getType();
|
| Type SrcType = Inst->getSrc(0)->getType();
|
| IceString TargetString;
|
| @@ -2200,7 +2252,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| _movp(Dest, T);
|
| } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
|
| // Use a helper for x86-32.
|
| - const SizeT MaxSrcs = 1;
|
| + constexpr SizeT MaxSrcs = 1;
|
| Type DestType = Dest->getType();
|
| InstCall *Call =
|
| makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
|
| @@ -2235,7 +2287,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| if (isVectorType(Src0->getType())) {
|
| assert(Dest->getType() == IceType_v4f32 &&
|
| Src0->getType() == IceType_v4i32);
|
| - const SizeT MaxSrcs = 1;
|
| + constexpr SizeT MaxSrcs = 1;
|
| InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
|
| Call->addArg(Src0);
|
| lowerCall(Call);
|
| @@ -2243,7 +2295,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
|
| (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
|
| // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
|
| // x86-32.
|
| - const SizeT MaxSrcs = 1;
|
| + constexpr SizeT MaxSrcs = 1;
|
| Type DestType = Dest->getType();
|
| IceString TargetString;
|
| if (isInt32Asserting32Or64(Src0->getType())) {
|
| @@ -2459,13 +2511,17 @@ void TargetX86Base<Machine>::lowerExtractElement(
|
| Type Ty = SourceVectNotLegalized->getType();
|
| Type ElementTy = typeElementType(Ty);
|
| Type InVectorElementTy = Traits::getInVectorElementType(Ty);
|
| - Variable *ExtractedElementR = makeReg(InVectorElementTy);
|
|
|
| // TODO(wala): Determine the best lowering sequences for each type.
|
| bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
|
| - InstructionSet >= Traits::SSE4_1;
|
| - if (CanUsePextr && Ty != IceType_v4f32) {
|
| - // Use pextrb, pextrw, or pextrd.
|
| + (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
|
| + Variable *ExtractedElementR =
|
| + makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
|
| + if (CanUsePextr) {
|
| + // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
|
| + // bits of the destination register, so we represent this by always
|
| + // extracting into an i32 register. The _mov into Dest below will do
|
| + // truncation as necessary.
|
| Constant *Mask = Ctx->getConstantInt32(Index);
|
| Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
|
| _pextr(ExtractedElementR, SourceVectR, Mask);
|
| @@ -2978,6 +3034,13 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
|
| if (Ty == IceType_v4f32)
|
| _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
|
| else
|
| + // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
|
| + // operand is a register, it must be a full r32 register like eax, and not
|
| + // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
|
| + // for the use of r16 and r8 by converting them through getBaseReg(),
|
| + // while emitIAS() validates that the original and base register encodings
|
| + // are the same. But for an "interior" register like ah, it should
|
| + // probably be copied into an r32 via movzx so that the types work out.
|
| _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
|
| _movp(Inst->getDest(), T);
|
| } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
|
| @@ -3312,7 +3375,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
|
| } else {
|
| FirstVal = Val;
|
| }
|
| - const bool IsCttz = false;
|
| + constexpr bool IsCttz = false;
|
| lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
|
| SecondVal);
|
| return;
|
| @@ -3329,7 +3392,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
|
| } else {
|
| FirstVal = Val;
|
| }
|
| - const bool IsCttz = true;
|
| + constexpr bool IsCttz = true;
|
| lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
|
| SecondVal);
|
| return;
|
| @@ -3427,7 +3490,8 @@ template <class Machine>
|
| void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
|
| Operand *Ptr, Operand *Expected,
|
| Operand *Desired) {
|
| - if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
|
| + Type Ty = Expected->getType();
|
| + if (!Traits::Is64Bit && Ty == IceType_i64) {
|
| // Reserve the pre-colored registers first, before adding any more
|
| // infinite-weight variables from formMemoryOperand's legalization.
|
| Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
|
| @@ -3438,9 +3502,8 @@ void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
|
| _mov(T_edx, hiOperand(Expected));
|
| _mov(T_ebx, loOperand(Desired));
|
| _mov(T_ecx, hiOperand(Desired));
|
| - typename Traits::X86OperandMem *Addr =
|
| - formMemoryOperand(Ptr, Expected->getType());
|
| - const bool Locked = true;
|
| + typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
|
| + constexpr bool Locked = true;
|
| _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
|
| Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
|
| Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
|
| @@ -3448,12 +3511,26 @@ void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
|
| _mov(DestHi, T_edx);
|
| return;
|
| }
|
| - Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
|
| + int32_t Eax;
|
| + switch (Ty) {
|
| + default:
|
| + llvm_unreachable("Bad type for cmpxchg");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + Eax = Traits::RegisterSet::Reg_eax;
|
| + break;
|
| + case IceType_i16:
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + break;
|
| + case IceType_i8:
|
| + Eax = Traits::RegisterSet::Reg_al;
|
| + break;
|
| + }
|
| + Variable *T_eax = makeReg(Ty, Eax);
|
| _mov(T_eax, Expected);
|
| - typename Traits::X86OperandMem *Addr =
|
| - formMemoryOperand(Ptr, Expected->getType());
|
| + typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
|
| Variable *DesiredReg = legalizeToReg(Desired);
|
| - const bool Locked = true;
|
| + constexpr bool Locked = true;
|
| _cmpxchg(Addr, T_eax, DesiredReg, Locked);
|
| _mov(DestPrev, T_eax);
|
| }
|
| @@ -3555,7 +3632,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
|
| }
|
| typename Traits::X86OperandMem *Addr =
|
| formMemoryOperand(Ptr, Dest->getType());
|
| - const bool Locked = true;
|
| + constexpr bool Locked = true;
|
| Variable *T = nullptr;
|
| _mov(T, Val);
|
| _xadd(Addr, T, Locked);
|
| @@ -3571,7 +3648,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
|
| }
|
| typename Traits::X86OperandMem *Addr =
|
| formMemoryOperand(Ptr, Dest->getType());
|
| - const bool Locked = true;
|
| + constexpr bool Locked = true;
|
| Variable *T = nullptr;
|
| _mov(T, Val);
|
| _neg(T);
|
| @@ -3679,7 +3756,7 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
|
| _mov(T_ecx, hiOperand(Val));
|
| Context.insert(Label);
|
| }
|
| - const bool Locked = true;
|
| + constexpr bool Locked = true;
|
| _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
|
| _br(Traits::Cond::Br_ne, Label);
|
| if (!IsXchg8b) {
|
| @@ -3706,7 +3783,22 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
|
| return;
|
| }
|
| typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
|
| - Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
|
| + int32_t Eax;
|
| + switch (Ty) {
|
| + default:
|
| + llvm_unreachable("Bad type for atomicRMW");
|
| + // fallthrough
|
| + case IceType_i32:
|
| + Eax = Traits::RegisterSet::Reg_eax;
|
| + break;
|
| + case IceType_i16:
|
| + Eax = Traits::RegisterSet::Reg_ax;
|
| + break;
|
| + case IceType_i8:
|
| + Eax = Traits::RegisterSet::Reg_al;
|
| + break;
|
| + }
|
| + Variable *T_eax = makeReg(Ty, Eax);
|
| _mov(T_eax, Addr);
|
| typename Traits::Insts::Label *Label =
|
| Traits::Insts::Label::create(Func, this);
|
| @@ -3716,7 +3808,7 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
|
| Variable *T = makeReg(Ty);
|
| _mov(T, T_eax);
|
| (this->*Op_Lo)(T, Val);
|
| - const bool Locked = true;
|
| + constexpr bool Locked = true;
|
| _cmpxchg(Addr, T_eax, T, Locked);
|
| _br(Traits::Cond::Br_ne, Label);
|
| // If Val is a variable, model the extended live range of Val through
|
| @@ -5094,7 +5186,7 @@ Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,
|
| return Reg;
|
| } else {
|
| // SSE has no left shift operation for vectors of 8 bit integers.
|
| - const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
|
| + constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
|
| Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
|
| Variable *Reg = makeReg(Ty, RegNum);
|
| _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
|
| @@ -5128,7 +5220,7 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
|
| // TODO(wala,stichnot): lea should not
|
| // be required. The address of the stack slot is known at compile time
|
| // (although not until after addProlog()).
|
| - const Type PointerType = IceType_i32;
|
| + constexpr Type PointerType = IceType_i32;
|
| Variable *Loc = makeReg(PointerType);
|
| _lea(Loc, Slot);
|
| Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
|
| @@ -5181,7 +5273,7 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
|
| }
|
| }
|
|
|
| - if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
|
| + if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
|
| // Before doing anything with a Mem operand, we need to ensure that the
|
| // Base and Index components are in physical registers.
|
| Variable *Base = Mem->getBase();
|
| @@ -5259,7 +5351,7 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
|
| }
|
| return From;
|
| }
|
| - if (auto Var = llvm::dyn_cast<Variable>(From)) {
|
| + if (auto *Var = llvm::dyn_cast<Variable>(From)) {
|
| // Check if the variable is guaranteed a physical register. This can happen
|
| // either when the variable is pre-colored or when it is assigned infinite
|
| // weight.
|
| @@ -5514,8 +5606,8 @@ Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
|
| IceString Label;
|
| llvm::raw_string_ostream Label_stream(Label);
|
| Immediate->emitPoolLabel(Label_stream, Ctx);
|
| - const RelocOffsetT Offset = 0;
|
| - const bool SuppressMangling = true;
|
| + constexpr RelocOffsetT Offset = 0;
|
| + constexpr bool SuppressMangling = true;
|
| Constant *Symbol =
|
| Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
|
| typename Traits::X86OperandMem *MemOperand =
|
| @@ -5611,8 +5703,8 @@ TargetX86Base<Machine>::randomizeOrPoolImmediate(
|
| llvm::raw_string_ostream Label_stream(Label);
|
| MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);
|
| MemOperand->getOffset()->setShouldBePooled(true);
|
| - const RelocOffsetT SymOffset = 0;
|
| - bool SuppressMangling = true;
|
| + constexpr RelocOffsetT SymOffset = 0;
|
| + constexpr bool SuppressMangling = true;
|
| Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
|
| SuppressMangling);
|
| typename Traits::X86OperandMem *SymbolOperand =
|
|
|