| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 580 Node->getInsts().insert(I3, RMW); | 580 Node->getInsts().insert(I3, RMW); |
| 581 } | 581 } |
| 582 } | 582 } |
| 583 if (Func->isVerbose(IceV_RMW)) | 583 if (Func->isVerbose(IceV_RMW)) |
| 584 Func->getContext()->unlockStr(); | 584 Func->getContext()->unlockStr(); |
| 585 } | 585 } |
| 586 | 586 |
| 587 // Converts a ConstantInteger32 operand into its constant value, or | 587 // Converts a ConstantInteger32 operand into its constant value, or |
| 588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
| 589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 591 return Integer->getValue(); | 591 return Integer->getValue(); |
| 592 return Intrinsics::MemoryOrderInvalid; | 592 return Intrinsics::MemoryOrderInvalid; |
| 593 } | 593 } |
| 594 | 594 |
| 595 /// Determines whether the dest of a Load instruction can be folded into one of | 595 /// Determines whether the dest of a Load instruction can be folded into one of |
| 596 /// the src operands of a 2-operand instruction. This is true as long as the | 596 /// the src operands of a 2-operand instruction. This is true as long as the |
| 597 /// load dest matches exactly one of the binary instruction's src operands. | 597 /// load dest matches exactly one of the binary instruction's src operands. |
| 598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. | 598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. |
| 599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
| 600 Operand *&Src0, Operand *&Src1) { | 600 Operand *&Src0, Operand *&Src1) { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 615 while (!Context.atEnd()) { | 615 while (!Context.atEnd()) { |
| 616 Variable *LoadDest = nullptr; | 616 Variable *LoadDest = nullptr; |
| 617 Operand *LoadSrc = nullptr; | 617 Operand *LoadSrc = nullptr; |
| 618 Inst *CurInst = Context.getCur(); | 618 Inst *CurInst = Context.getCur(); |
| 619 Inst *Next = Context.getNextInst(); | 619 Inst *Next = Context.getNextInst(); |
| 620 // Determine whether the current instruction is a Load instruction or | 620 // Determine whether the current instruction is a Load instruction or |
| 621 // equivalent. | 621 // equivalent. |
| 622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
| 623 // An InstLoad always qualifies. | 623 // An InstLoad always qualifies. |
| 624 LoadDest = Load->getDest(); | 624 LoadDest = Load->getDest(); |
| 625 const bool DoLegalize = false; | 625 constexpr bool DoLegalize = false; |
| 626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | 626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), |
| 627 LoadDest->getType(), DoLegalize); | 627 LoadDest->getType(), DoLegalize); |
| 628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | 628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { |
| 629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory | 629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory |
| 630 // ordering, and can be implemented in a single instruction (i.e., not | 630 // ordering, and can be implemented in a single instruction (i.e., not |
| 631 // i64 on x86-32). | 631 // i64 on x86-32). |
| 632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | 632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; |
| 633 if (ID == Intrinsics::AtomicLoad && | 633 if (ID == Intrinsics::AtomicLoad && |
| 634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && | 634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && |
| 635 Intrinsics::isMemoryOrderValid( | 635 Intrinsics::isMemoryOrderValid( |
| 636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | 636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { |
| 637 LoadDest = Intrin->getDest(); | 637 LoadDest = Intrin->getDest(); |
| 638 const bool DoLegalize = false; | 638 constexpr bool DoLegalize = false; |
| 639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | 639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), |
| 640 DoLegalize); | 640 DoLegalize); |
| 641 } | 641 } |
| 642 } | 642 } |
| 643 // A Load instruction can be folded into the following instruction only | 643 // A Load instruction can be folded into the following instruction only |
| 644 // if the following instruction ends the Load's Dest variable's live | 644 // if the following instruction ends the Load's Dest variable's live |
| 645 // range. | 645 // range. |
| 646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { | 646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { |
| 647 assert(LoadSrc); | 647 assert(LoadSrc); |
| 648 Inst *NewInst = nullptr; | 648 Inst *NewInst = nullptr; |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 726 // considered live upon function entry. Otherwise it's possible to get | 726 // considered live upon function entry. Otherwise it's possible to get |
| 727 // liveness validation errors for saving callee-save registers. | 727 // liveness validation errors for saving callee-save registers. |
| 728 Func->addImplicitArg(Reg); | 728 Func->addImplicitArg(Reg); |
| 729 // Don't bother tracking the live range of a named physical register. | 729 // Don't bother tracking the live range of a named physical register. |
| 730 Reg->setIgnoreLiveness(); | 730 Reg->setIgnoreLiveness(); |
| 731 } | 731 } |
| 732 return Reg; | 732 return Reg; |
| 733 } | 733 } |
| 734 | 734 |
| 735 template <class Machine> | 735 template <class Machine> |
| 736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const { |
| 737 return Traits::getRegName(RegNum, Ty); | 737 return Traits::getRegName(RegNum); |
| 738 } | 738 } |
| 739 | 739 |
| 740 template <class Machine> | 740 template <class Machine> |
| 741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | 741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { |
| 742 if (!BuildDefs::dump()) | 742 if (!BuildDefs::dump()) |
| 743 return; | 743 return; |
| 744 Ostream &Str = Ctx->getStrEmit(); | 744 Ostream &Str = Ctx->getStrEmit(); |
| 745 if (Var->hasReg()) { | 745 if (Var->hasReg()) { |
| 746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
| 747 return; | 747 return; |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 790 if (Var->mustHaveReg()) { | 790 if (Var->mustHaveReg()) { |
| 791 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 791 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 792 } | 792 } |
| 793 int32_t Offset = Var->getStackOffset(); | 793 int32_t Offset = Var->getStackOffset(); |
| 794 int32_t BaseRegNum = Var->getBaseRegNum(); | 794 int32_t BaseRegNum = Var->getBaseRegNum(); |
| 795 if (Var->getBaseRegNum() == Variable::NoRegister) { | 795 if (Var->getBaseRegNum() == Variable::NoRegister) { |
| 796 BaseRegNum = getFrameOrStackReg(); | 796 BaseRegNum = getFrameOrStackReg(); |
| 797 if (!hasFramePointer()) | 797 if (!hasFramePointer()) |
| 798 Offset += getStackAdjustment(); | 798 Offset += getStackAdjustment(); |
| 799 } | 799 } |
| 800 return typename Traits::Address( | 800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset); |
| 801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset); | |
| 802 } | 801 } |
| 803 | 802 |
| 804 /// Helper function for addProlog(). | 803 /// Helper function for addProlog(). |
| 805 /// | 804 /// |
| 806 /// This assumes Arg is an argument passed on the stack. This sets the frame | 805 /// This assumes Arg is an argument passed on the stack. This sets the frame |
| 807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 806 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
| 808 /// I64 arg that has been split into Lo and Hi components, it calls itself | 807 /// I64 arg that has been split into Lo and Hi components, it calls itself |
| 809 /// recursively on the components, taking care to handle Lo first because of the | 808 /// recursively on the components, taking care to handle Lo first because of the |
| 810 /// little-endian architecture. Lastly, this function generates an instruction | 809 /// little-endian architecture. Lastly, this function generates an instruction |
| 811 /// to copy Arg into its assigned register if applicable. | 810 /// to copy Arg into its assigned register if applicable. |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1040 Src1 /= 2; | 1039 Src1 /= 2; |
| 1041 } else { | 1040 } else { |
| 1042 return false; | 1041 return false; |
| 1043 } | 1042 } |
| 1044 } | 1043 } |
| 1045 // Lea optimization only works for i16 and i32 types, not i8. | 1044 // Lea optimization only works for i16 and i32 types, not i8. |
| 1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1045 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| 1047 return false; | 1046 return false; |
| 1048 // Limit the number of lea/shl operations for a single multiply, to a | 1047 // Limit the number of lea/shl operations for a single multiply, to a |
| 1049 // somewhat arbitrary choice of 3. | 1048 // somewhat arbitrary choice of 3. |
| 1050 const uint32_t MaxOpsForOptimizedMul = 3; | 1049 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
| 1051 if (CountOps > MaxOpsForOptimizedMul) | 1050 if (CountOps > MaxOpsForOptimizedMul) |
| 1052 return false; | 1051 return false; |
| 1053 _mov(T, Src0); | 1052 _mov(T, Src0); |
| 1054 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1053 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1055 for (uint32_t i = 0; i < Count9; ++i) { | 1054 for (uint32_t i = 0; i < Count9; ++i) { |
| 1056 const uint16_t Shift = 3; // log2(9-1) | 1055 constexpr uint16_t Shift = 3; // log2(9-1) |
| 1057 _lea(T, | 1056 _lea(T, |
| 1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1057 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1059 } | 1058 } |
| 1060 for (uint32_t i = 0; i < Count5; ++i) { | 1059 for (uint32_t i = 0; i < Count5; ++i) { |
| 1061 const uint16_t Shift = 2; // log2(5-1) | 1060 constexpr uint16_t Shift = 2; // log2(5-1) |
| 1062 _lea(T, | 1061 _lea(T, |
| 1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1062 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1064 } | 1063 } |
| 1065 for (uint32_t i = 0; i < Count3; ++i) { | 1064 for (uint32_t i = 0; i < Count3; ++i) { |
| 1066 const uint16_t Shift = 1; // log2(3-1) | 1065 constexpr uint16_t Shift = 1; // log2(3-1) |
| 1067 _lea(T, | 1066 _lea(T, |
| 1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1067 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1069 } | 1068 } |
| 1070 if (Count2) { | 1069 if (Count2) { |
| 1071 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1070 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
| 1072 } | 1071 } |
| 1073 if (Src1IsNegative) | 1072 if (Src1IsNegative) |
| 1074 _neg(T); | 1073 _neg(T); |
| 1075 _mov(Dest, T); | 1074 _mov(Dest, T); |
| 1076 return true; | 1075 return true; |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1208 } | 1207 } |
| 1209 } else { | 1208 } else { |
| 1210 // NON-CONSTANT CASES. | 1209 // NON-CONSTANT CASES. |
| 1211 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1210 Constant *BitTest = Ctx->getConstantInt32(0x20); |
| 1212 typename Traits::Insts::Label *Label = | 1211 typename Traits::Insts::Label *Label = |
| 1213 Traits::Insts::Label::create(Func, this); | 1212 Traits::Insts::Label::create(Func, this); |
| 1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==> | 1213 // COMMON PREFIX OF: a=b SHIFT_OP c ==> |
| 1215 // t1:ecx = c.lo & 0xff | 1214 // t1:ecx = c.lo & 0xff |
| 1216 // t2 = b.lo | 1215 // t2 = b.lo |
| 1217 // t3 = b.hi | 1216 // t3 = b.hi |
| 1218 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 1217 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1218 _mov(T_1, Src1Lo); |
| 1219 _mov(T_2, Src0Lo); | 1219 _mov(T_2, Src0Lo); |
| 1220 _mov(T_3, Src0Hi); | 1220 _mov(T_3, Src0Hi); |
| 1221 switch (Op) { | 1221 switch (Op) { |
| 1222 default: | 1222 default: |
| 1223 assert(0 && "non-shift op"); | 1223 assert(0 && "non-shift op"); |
| 1224 break; | 1224 break; |
| 1225 case InstArithmetic::Shl: { | 1225 case InstArithmetic::Shl: { |
| 1226 // a=b<<c ==> | 1226 // a=b<<c ==> |
| 1227 // t3 = shld t3, t2, t1 | 1227 // t3 = shld t3, t2, t1 |
| 1228 // t2 = shl t2, t1 | 1228 // t2 = shl t2, t1 |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1316 } | 1316 } |
| 1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1318 // These x86-32 helper-call-involved instructions are lowered in this | 1318 // These x86-32 helper-call-involved instructions are lowered in this |
| 1319 // separate switch. This is because loOperand() and hiOperand() may insert | 1319 // separate switch. This is because loOperand() and hiOperand() may insert |
| 1320 // redundant instructions for constant blinding and pooling. Such redundant | 1320 // redundant instructions for constant blinding and pooling. Such redundant |
| 1321 // instructions will fail liveness analysis under -Om1 setting. And, | 1321 // instructions will fail liveness analysis under -Om1 setting. And, |
| 1322 // actually these arguments do not need to be processed with loOperand() | 1322 // actually these arguments do not need to be processed with loOperand() |
| 1323 // and hiOperand() to be used. | 1323 // and hiOperand() to be used. |
| 1324 switch (Inst->getOp()) { | 1324 switch (Inst->getOp()) { |
| 1325 case InstArithmetic::Udiv: { | 1325 case InstArithmetic::Udiv: { |
| 1326 const SizeT MaxSrcs = 2; | 1326 constexpr SizeT MaxSrcs = 2; |
| 1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); |
| 1328 Call->addArg(Inst->getSrc(0)); | 1328 Call->addArg(Inst->getSrc(0)); |
| 1329 Call->addArg(Inst->getSrc(1)); | 1329 Call->addArg(Inst->getSrc(1)); |
| 1330 lowerCall(Call); | 1330 lowerCall(Call); |
| 1331 return; | 1331 return; |
| 1332 } | 1332 } |
| 1333 case InstArithmetic::Sdiv: { | 1333 case InstArithmetic::Sdiv: { |
| 1334 const SizeT MaxSrcs = 2; | 1334 constexpr SizeT MaxSrcs = 2; |
| 1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); | 1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); |
| 1336 Call->addArg(Inst->getSrc(0)); | 1336 Call->addArg(Inst->getSrc(0)); |
| 1337 Call->addArg(Inst->getSrc(1)); | 1337 Call->addArg(Inst->getSrc(1)); |
| 1338 lowerCall(Call); | 1338 lowerCall(Call); |
| 1339 return; | 1339 return; |
| 1340 } | 1340 } |
| 1341 case InstArithmetic::Urem: { | 1341 case InstArithmetic::Urem: { |
| 1342 const SizeT MaxSrcs = 2; | 1342 constexpr SizeT MaxSrcs = 2; |
| 1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); | 1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); |
| 1344 Call->addArg(Inst->getSrc(0)); | 1344 Call->addArg(Inst->getSrc(0)); |
| 1345 Call->addArg(Inst->getSrc(1)); | 1345 Call->addArg(Inst->getSrc(1)); |
| 1346 lowerCall(Call); | 1346 lowerCall(Call); |
| 1347 return; | 1347 return; |
| 1348 } | 1348 } |
| 1349 case InstArithmetic::Srem: { | 1349 case InstArithmetic::Srem: { |
| 1350 const SizeT MaxSrcs = 2; | 1350 constexpr SizeT MaxSrcs = 2; |
| 1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); | 1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); |
| 1352 Call->addArg(Inst->getSrc(0)); | 1352 Call->addArg(Inst->getSrc(0)); |
| 1353 Call->addArg(Inst->getSrc(1)); | 1353 Call->addArg(Inst->getSrc(1)); |
| 1354 lowerCall(Call); | 1354 lowerCall(Call); |
| 1355 return; | 1355 return; |
| 1356 } | 1356 } |
| 1357 default: | 1357 default: |
| 1358 break; | 1358 break; |
| 1359 } | 1359 } |
| 1360 | 1360 |
| (...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1521 // pmuludq T1, Src1 | 1521 // pmuludq T1, Src1 |
| 1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} | 1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
| 1523 // pmuludq T2, T3 | 1523 // pmuludq T2, T3 |
| 1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} | 1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} |
| 1525 // shufps T1, T2, {0,2,0,2} | 1525 // shufps T1, T2, {0,2,0,2} |
| 1526 // pshufd T4, T1, {0,2,1,3} | 1526 // pshufd T4, T1, {0,2,1,3} |
| 1527 // movups Dest, T4 | 1527 // movups Dest, T4 |
| 1528 | 1528 |
| 1529 // Mask that directs pshufd to create a vector with entries | 1529 // Mask that directs pshufd to create a vector with entries |
| 1530 // Src[1, 0, 3, 0] | 1530 // Src[1, 0, 3, 0] |
| 1531 const unsigned Constant1030 = 0x31; | 1531 constexpr unsigned Constant1030 = 0x31; |
| 1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); | 1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); |
| 1533 // Mask that directs shufps to create a vector with entries | 1533 // Mask that directs shufps to create a vector with entries |
| 1534 // Dest[0, 2], Src[0, 2] | 1534 // Dest[0, 2], Src[0, 2] |
| 1535 const unsigned Mask0202 = 0x88; | 1535 constexpr unsigned Mask0202 = 0x88; |
| 1536 // Mask that directs pshufd to create a vector with entries | 1536 // Mask that directs pshufd to create a vector with entries |
| 1537 // Src[0, 2, 1, 3] | 1537 // Src[0, 2, 1, 3] |
| 1538 const unsigned Mask0213 = 0xd8; | 1538 constexpr unsigned Mask0213 = 0xd8; |
| 1539 Variable *T1 = makeReg(IceType_v4i32); | 1539 Variable *T1 = makeReg(IceType_v4i32); |
| 1540 Variable *T2 = makeReg(IceType_v4i32); | 1540 Variable *T2 = makeReg(IceType_v4i32); |
| 1541 Variable *T3 = makeReg(IceType_v4i32); | 1541 Variable *T3 = makeReg(IceType_v4i32); |
| 1542 Variable *T4 = makeReg(IceType_v4i32); | 1542 Variable *T4 = makeReg(IceType_v4i32); |
| 1543 _movp(T1, Src0); | 1543 _movp(T1, Src0); |
| 1544 _pshufd(T2, Src0, Mask1030); | 1544 _pshufd(T2, Src0, Mask1030); |
| 1545 _pshufd(T3, Src1, Mask1030); | 1545 _pshufd(T3, Src1, Mask1030); |
| 1546 _pmuludq(T1, Src1); | 1546 _pmuludq(T1, Src1); |
| 1547 _pmuludq(T2, T3); | 1547 _pmuludq(T2, T3); |
| 1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); | 1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1623 _mov(T, Src0); | 1623 _mov(T, Src0); |
| 1624 _sub(T, Src1); | 1624 _sub(T, Src1); |
| 1625 _mov(Dest, T); | 1625 _mov(Dest, T); |
| 1626 break; | 1626 break; |
| 1627 case InstArithmetic::Mul: | 1627 case InstArithmetic::Mul: |
| 1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1629 if (optimizeScalarMul(Dest, Src0, C->getValue())) | 1629 if (optimizeScalarMul(Dest, Src0, C->getValue())) |
| 1630 return; | 1630 return; |
| 1631 } | 1631 } |
| 1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must | 1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must |
| 1633 // be in eax. | 1633 // be in al. |
| 1634 if (isByteSizedArithType(Dest->getType())) { | 1634 if (isByteSizedArithType(Dest->getType())) { |
| 1635 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1635 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1637 _imul(T, Src0 == Src1 ? T : Src1); | 1637 _imul(T, Src0 == Src1 ? T : Src1); |
| 1638 _mov(Dest, T); | 1638 _mov(Dest, T); |
| 1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1640 T = makeReg(Dest->getType()); | 1640 T = makeReg(Dest->getType()); |
| 1641 _imul_imm(T, Src0, ImmConst); | 1641 _imul_imm(T, Src0, ImmConst); |
| 1642 _mov(Dest, T); | 1642 _mov(Dest, T); |
| 1643 } else { | 1643 } else { |
| 1644 _mov(T, Src0); | 1644 _mov(T, Src0); |
| 1645 _imul(T, Src0 == Src1 ? T : Src1); | 1645 _imul(T, Src0 == Src1 ? T : Src1); |
| 1646 _mov(Dest, T); | 1646 _mov(Dest, T); |
| 1647 } | 1647 } |
| 1648 break; | 1648 break; |
| 1649 case InstArithmetic::Shl: | 1649 case InstArithmetic::Shl: |
| 1650 _mov(T, Src0); | 1650 _mov(T, Src0); |
| 1651 if (!llvm::isa<ConstantInteger32>(Src1)) | 1651 if (!llvm::isa<ConstantInteger32>(Src1)) { |
| 1652 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1652 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1653 _mov(Cl, Src1); |
| 1654 Src1 = Cl; |
| 1655 } |
| 1653 _shl(T, Src1); | 1656 _shl(T, Src1); |
| 1654 _mov(Dest, T); | 1657 _mov(Dest, T); |
| 1655 break; | 1658 break; |
| 1656 case InstArithmetic::Lshr: | 1659 case InstArithmetic::Lshr: |
| 1657 _mov(T, Src0); | 1660 _mov(T, Src0); |
| 1658 if (!llvm::isa<ConstantInteger32>(Src1)) | 1661 if (!llvm::isa<ConstantInteger32>(Src1)) { |
| 1659 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1662 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1663 _mov(Cl, Src1); |
| 1664 Src1 = Cl; |
| 1665 } |
| 1660 _shr(T, Src1); | 1666 _shr(T, Src1); |
| 1661 _mov(Dest, T); | 1667 _mov(Dest, T); |
| 1662 break; | 1668 break; |
| 1663 case InstArithmetic::Ashr: | 1669 case InstArithmetic::Ashr: |
| 1664 _mov(T, Src0); | 1670 _mov(T, Src0); |
| 1665 if (!llvm::isa<ConstantInteger32>(Src1)) | 1671 if (!llvm::isa<ConstantInteger32>(Src1)) { |
| 1666 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1672 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1673 _mov(Cl, Src1); |
| 1674 Src1 = Cl; |
| 1675 } |
| 1667 _sar(T, Src1); | 1676 _sar(T, Src1); |
| 1668 _mov(Dest, T); | 1677 _mov(Dest, T); |
| 1669 break; | 1678 break; |
| 1670 case InstArithmetic::Udiv: | 1679 case InstArithmetic::Udiv: |
| 1671 // div and idiv are the few arithmetic operators that do not allow | 1680 // div and idiv are the few arithmetic operators that do not allow |
| 1672 // immediates as the operand. | 1681 // immediates as the operand. |
| 1673 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1682 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1674 if (isByteSizedArithType(Dest->getType())) { | 1683 if (isByteSizedArithType(Dest->getType())) { |
| 1675 // For 8-bit unsigned division we need to zero-extend al into ah. A mov | 1684 // For 8-bit unsigned division we need to zero-extend al into ah. A mov |
| 1676 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 | 1685 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 |
| 1677 // assembler refuses to encode %ah (encoding %spl with a REX prefix | 1686 // assembler refuses to encode %ah (encoding %spl with a REX prefix |
| 1678 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah | 1687 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah |
| 1679 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and | 1688 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and |
| 1680 // d[%lh], which means the X86 target lowering (and the register | 1689 // d[%lh], which means the X86 target lowering (and the register |
| 1681 // allocator) would have to be aware of this restriction. For now, we | 1690 // allocator) would have to be aware of this restriction. For now, we |
| 1682 // simply zero %eax completely, and move the dividend into %al. | 1691 // simply zero %eax completely, and move the dividend into %al. |
| 1683 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1692 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1684 Context.insert(InstFakeDef::create(Func, T_eax)); | 1693 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 1685 _xor(T_eax, T_eax); | 1694 _xor(T_eax, T_eax); |
| 1686 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1695 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1687 _div(T, Src1, T); | 1696 _div(T, Src1, T); |
| 1688 _mov(Dest, T); | 1697 _mov(Dest, T); |
| 1689 Context.insert(InstFakeUse::create(Func, T_eax)); | 1698 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1690 } else { | 1699 } else { |
| 1691 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1700 Type Ty = Dest->getType(); |
| 1692 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1701 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1693 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1702 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1703 switch (Ty) { |
| 1704 default: |
| 1705 llvm_unreachable("Bad type for udiv"); |
| 1706 // fallthrough |
| 1707 case IceType_i32: |
| 1708 break; |
| 1709 case IceType_i16: |
| 1710 Eax = Traits::RegisterSet::Reg_ax; |
| 1711 Edx = Traits::RegisterSet::Reg_dx; |
| 1712 break; |
| 1713 } |
| 1714 Constant *Zero = Ctx->getConstantZero(Ty); |
| 1715 _mov(T, Src0, Eax); |
| 1716 _mov(T_edx, Zero, Edx); |
| 1694 _div(T, Src1, T_edx); | 1717 _div(T, Src1, T_edx); |
| 1695 _mov(Dest, T); | 1718 _mov(Dest, T); |
| 1696 } | 1719 } |
| 1697 break; | 1720 break; |
| 1698 case InstArithmetic::Sdiv: | 1721 case InstArithmetic::Sdiv: |
| 1699 // TODO(stichnot): Enable this after doing better performance and cross | 1722 // TODO(stichnot): Enable this after doing better performance and cross |
| 1700 // testing. | 1723 // testing. |
| 1701 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1724 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1702 // Optimize division by constant power of 2, but not for Om1 or O0, just | 1725 // Optimize division by constant power of 2, but not for Om1 or O0, just |
| 1703 // to keep things simple there. | 1726 // to keep things simple there. |
| (...skipping 21 matching lines...) Expand all Loading... |
| 1725 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 1748 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 1726 _add(T, Src0); | 1749 _add(T, Src0); |
| 1727 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1750 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| 1728 } | 1751 } |
| 1729 _mov(Dest, T); | 1752 _mov(Dest, T); |
| 1730 return; | 1753 return; |
| 1731 } | 1754 } |
| 1732 } | 1755 } |
| 1733 } | 1756 } |
| 1734 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1757 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1735 if (isByteSizedArithType(Dest->getType())) { | 1758 switch (Type Ty = Dest->getType()) { |
| 1759 default: |
| 1760 llvm_unreachable("Bad type for sdiv"); |
| 1761 // fallthrough |
| 1762 case IceType_i32: |
| 1763 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
| 1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1764 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1737 _cbwdq(T, T); | 1765 break; |
| 1738 _idiv(T, Src1, T); | 1766 case IceType_i16: |
| 1739 _mov(Dest, T); | 1767 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
| 1740 } else { | 1768 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
| 1741 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1769 break; |
| 1742 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1770 case IceType_i8: |
| 1743 _cbwdq(T_edx, T); | 1771 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
| 1744 _idiv(T, Src1, T_edx); | 1772 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1745 _mov(Dest, T); | 1773 break; |
| 1746 } | 1774 } |
| 1775 _cbwdq(T_edx, T); |
| 1776 _idiv(T, Src1, T_edx); |
| 1777 _mov(Dest, T); |
| 1747 break; | 1778 break; |
| 1748 case InstArithmetic::Urem: | 1779 case InstArithmetic::Urem: |
| 1749 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1780 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1750 if (isByteSizedArithType(Dest->getType())) { | 1781 if (isByteSizedArithType(Dest->getType())) { |
| 1751 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1782 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1752 Context.insert(InstFakeDef::create(Func, T_eax)); | 1783 Context.insert(InstFakeDef::create(Func, T_eax)); |
| 1753 _xor(T_eax, T_eax); | 1784 _xor(T_eax, T_eax); |
| 1754 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1785 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1755 _div(T, Src1, T); | 1786 _div(T, Src1, T); |
| 1756 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 1787 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 1757 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1788 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1758 // this ever becomes a problem we can introduce a pseudo rem instruction | 1789 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1759 // that returns the remainder in %al directly (and uses a mov for copying | 1790 // that returns the remainder in %al directly (and uses a mov for copying |
| 1760 // %ah to %al.) | 1791 // %ah to %al.) |
| 1761 static constexpr uint8_t AlSizeInBits = 8; | 1792 static constexpr uint8_t AlSizeInBits = 8; |
| 1762 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1793 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1763 _mov(Dest, T); | 1794 _mov(Dest, T); |
| 1764 Context.insert(InstFakeUse::create(Func, T_eax)); | 1795 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1765 } else { | 1796 } else { |
| 1766 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1797 Type Ty = Dest->getType(); |
| 1767 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | 1798 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1799 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1800 switch (Ty) { |
| 1801 default: |
| 1802 llvm_unreachable("Bad type for urem"); |
| 1803 // fallthrough |
| 1804 case IceType_i32: |
| 1805 break; |
| 1806 case IceType_i16: |
| 1807 Eax = Traits::RegisterSet::Reg_ax; |
| 1808 Edx = Traits::RegisterSet::Reg_dx; |
| 1809 break; |
| 1810 } |
| 1811 Constant *Zero = Ctx->getConstantZero(Ty); |
| 1812 T_edx = makeReg(Dest->getType(), Edx); |
| 1768 _mov(T_edx, Zero); | 1813 _mov(T_edx, Zero); |
| 1769 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1814 _mov(T, Src0, Eax); |
| 1770 _div(T_edx, Src1, T); | 1815 _div(T_edx, Src1, T); |
| 1771 _mov(Dest, T_edx); | 1816 _mov(Dest, T_edx); |
| 1772 } | 1817 } |
| 1773 break; | 1818 break; |
| 1774 case InstArithmetic::Srem: | 1819 case InstArithmetic::Srem: |
| 1775 // TODO(stichnot): Enable this after doing better performance and cross | 1820 // TODO(stichnot): Enable this after doing better performance and cross |
| 1776 // testing. | 1821 // testing. |
| 1777 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1822 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1778 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 1823 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
| 1779 // keep things simple there. | 1824 // keep things simple there. |
| (...skipping 26 matching lines...) Expand all Loading... |
| 1806 _add(T, Src0); | 1851 _add(T, Src0); |
| 1807 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 1808 _sub(T, Src0); | 1853 _sub(T, Src0); |
| 1809 _neg(T); | 1854 _neg(T); |
| 1810 _mov(Dest, T); | 1855 _mov(Dest, T); |
| 1811 return; | 1856 return; |
| 1812 } | 1857 } |
| 1813 } | 1858 } |
| 1814 } | 1859 } |
| 1815 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1816 if (isByteSizedArithType(Dest->getType())) { | 1861 switch (Type Ty = Dest->getType()) { |
| 1817 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1862 default: |
| 1818 // T is %al. | 1863 llvm_unreachable("Bad type for srem"); |
| 1819 _cbwdq(T, T); | 1864 // fallthrough |
| 1820 _idiv(T, Src1, T); | 1865 case IceType_i32: |
| 1821 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1866 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
| 1822 Context.insert(InstFakeDef::create(Func, T_eax)); | |
| 1823 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | |
| 1824 // mov %ah, %al because it would make x86-64 codegen more complicated. If | |
| 1825 // this ever becomes a problem we can introduce a pseudo rem instruction | |
| 1826 // that returns the remainder in %al directly (and uses a mov for copying | |
| 1827 // %ah to %al.) | |
| 1828 static constexpr uint8_t AlSizeInBits = 8; | |
| 1829 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | |
| 1830 _mov(Dest, T); | |
| 1831 Context.insert(InstFakeUse::create(Func, T_eax)); | |
| 1832 } else { | |
| 1833 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | |
| 1834 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1867 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1835 _cbwdq(T_edx, T); | 1868 _cbwdq(T_edx, T); |
| 1836 _idiv(T_edx, Src1, T); | 1869 _idiv(T_edx, Src1, T); |
| 1837 _mov(Dest, T_edx); | 1870 _mov(Dest, T_edx); |
| 1871 break; |
| 1872 case IceType_i16: |
| 1873 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
| 1874 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
| 1875 _cbwdq(T_edx, T); |
| 1876 _idiv(T_edx, Src1, T); |
| 1877 _mov(Dest, T_edx); |
| 1878 break; |
| 1879 case IceType_i8: |
| 1880 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
| 1881 // TODO(stichnot): Use register ah for T_edx, and remove the _shr(). |
| 1882 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah); |
| 1883 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1884 _cbwdq(T_edx, T); |
| 1885 _idiv(T_edx, Src1, T); |
| 1886 static constexpr uint8_t AlSizeInBits = 8; |
| 1887 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits)); |
| 1888 _mov(Dest, T_edx); |
| 1889 break; |
| 1838 } | 1890 } |
| 1839 break; | 1891 break; |
| 1840 case InstArithmetic::Fadd: | 1892 case InstArithmetic::Fadd: |
| 1841 _mov(T, Src0); | 1893 _mov(T, Src0); |
| 1842 _addss(T, Src1); | 1894 _addss(T, Src1); |
| 1843 _mov(Dest, T); | 1895 _mov(Dest, T); |
| 1844 break; | 1896 break; |
| 1845 case InstArithmetic::Fsub: | 1897 case InstArithmetic::Fsub: |
| 1846 _mov(T, Src0); | 1898 _mov(T, Src0); |
| 1847 _subss(T, Src1); | 1899 _subss(T, Src1); |
| 1848 _mov(Dest, T); | 1900 _mov(Dest, T); |
| 1849 break; | 1901 break; |
| 1850 case InstArithmetic::Fmul: | 1902 case InstArithmetic::Fmul: |
| 1851 _mov(T, Src0); | 1903 _mov(T, Src0); |
| 1852 _mulss(T, Src0 == Src1 ? T : Src1); | 1904 _mulss(T, Src0 == Src1 ? T : Src1); |
| 1853 _mov(Dest, T); | 1905 _mov(Dest, T); |
| 1854 break; | 1906 break; |
| 1855 case InstArithmetic::Fdiv: | 1907 case InstArithmetic::Fdiv: |
| 1856 _mov(T, Src0); | 1908 _mov(T, Src0); |
| 1857 _divss(T, Src1); | 1909 _divss(T, Src1); |
| 1858 _mov(Dest, T); | 1910 _mov(Dest, T); |
| 1859 break; | 1911 break; |
| 1860 case InstArithmetic::Frem: { | 1912 case InstArithmetic::Frem: { |
| 1861 const SizeT MaxSrcs = 2; | 1913 constexpr SizeT MaxSrcs = 2; |
| 1862 Type Ty = Dest->getType(); | 1914 Type Ty = Dest->getType(); |
| 1863 InstCall *Call = makeHelperCall( | 1915 InstCall *Call = makeHelperCall( |
| 1864 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 1916 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
| 1865 Call->addArg(Src0); | 1917 Call->addArg(Src0); |
| 1866 Call->addArg(Src1); | 1918 Call->addArg(Src1); |
| 1867 return lowerCall(Call); | 1919 return lowerCall(Call); |
| 1868 } | 1920 } |
| 1869 } | 1921 } |
| 1870 } | 1922 } |
| 1871 | 1923 |
| (...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2106 if (isVectorType(Dest->getType())) { | 2158 if (isVectorType(Dest->getType())) { |
| 2107 assert(Dest->getType() == IceType_v4i32 && | 2159 assert(Dest->getType() == IceType_v4i32 && |
| 2108 Inst->getSrc(0)->getType() == IceType_v4f32); | 2160 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2109 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2161 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2110 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2162 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2111 Src0RM = legalizeToReg(Src0RM); | 2163 Src0RM = legalizeToReg(Src0RM); |
| 2112 Variable *T = makeReg(Dest->getType()); | 2164 Variable *T = makeReg(Dest->getType()); |
| 2113 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2165 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2114 _movp(Dest, T); | 2166 _movp(Dest, T); |
| 2115 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2167 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2116 const SizeT MaxSrcs = 1; | 2168 constexpr SizeT MaxSrcs = 1; |
| 2117 Type SrcType = Inst->getSrc(0)->getType(); | 2169 Type SrcType = Inst->getSrc(0)->getType(); |
| 2118 InstCall *Call = | 2170 InstCall *Call = |
| 2119 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 2171 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 2120 : H_fptosi_f64_i64, | 2172 : H_fptosi_f64_i64, |
| 2121 Dest, MaxSrcs); | 2173 Dest, MaxSrcs); |
| 2122 Call->addArg(Inst->getSrc(0)); | 2174 Call->addArg(Inst->getSrc(0)); |
| 2123 lowerCall(Call); | 2175 lowerCall(Call); |
| 2124 } else { | 2176 } else { |
| 2125 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2177 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2126 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2178 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| (...skipping 10 matching lines...) Expand all Loading... |
| 2137 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2189 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2138 if (Dest->getType() == IceType_i1) | 2190 if (Dest->getType() == IceType_i1) |
| 2139 _and(T_2, Ctx->getConstantInt1(1)); | 2191 _and(T_2, Ctx->getConstantInt1(1)); |
| 2140 _mov(Dest, T_2); | 2192 _mov(Dest, T_2); |
| 2141 } | 2193 } |
| 2142 break; | 2194 break; |
| 2143 case InstCast::Fptoui: | 2195 case InstCast::Fptoui: |
| 2144 if (isVectorType(Dest->getType())) { | 2196 if (isVectorType(Dest->getType())) { |
| 2145 assert(Dest->getType() == IceType_v4i32 && | 2197 assert(Dest->getType() == IceType_v4i32 && |
| 2146 Inst->getSrc(0)->getType() == IceType_v4f32); | 2198 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2147 const SizeT MaxSrcs = 1; | 2199 constexpr SizeT MaxSrcs = 1; |
| 2148 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2200 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
| 2149 Call->addArg(Inst->getSrc(0)); | 2201 Call->addArg(Inst->getSrc(0)); |
| 2150 lowerCall(Call); | 2202 lowerCall(Call); |
| 2151 } else if (Dest->getType() == IceType_i64 || | 2203 } else if (Dest->getType() == IceType_i64 || |
| 2152 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 2204 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
| 2153 // Use a helper for both x86-32 and x86-64. | 2205 // Use a helper for both x86-32 and x86-64. |
| 2154 const SizeT MaxSrcs = 1; | 2206 constexpr SizeT MaxSrcs = 1; |
| 2155 Type DestType = Dest->getType(); | 2207 Type DestType = Dest->getType(); |
| 2156 Type SrcType = Inst->getSrc(0)->getType(); | 2208 Type SrcType = Inst->getSrc(0)->getType(); |
| 2157 IceString TargetString; | 2209 IceString TargetString; |
| 2158 if (Traits::Is64Bit) { | 2210 if (Traits::Is64Bit) { |
| 2159 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2160 : H_fptoui_f64_i64; | 2212 : H_fptoui_f64_i64; |
| 2161 } else if (isInt32Asserting32Or64(DestType)) { | 2213 } else if (isInt32Asserting32Or64(DestType)) { |
| 2162 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
| 2163 : H_fptoui_f64_i32; | 2215 : H_fptoui_f64_i32; |
| 2164 } else { | 2216 } else { |
| (...skipping 28 matching lines...) Expand all Loading... |
| 2193 assert(Dest->getType() == IceType_v4f32 && | 2245 assert(Dest->getType() == IceType_v4f32 && |
| 2194 Inst->getSrc(0)->getType() == IceType_v4i32); | 2246 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2195 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2247 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2196 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2248 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2197 Src0RM = legalizeToReg(Src0RM); | 2249 Src0RM = legalizeToReg(Src0RM); |
| 2198 Variable *T = makeReg(Dest->getType()); | 2250 Variable *T = makeReg(Dest->getType()); |
| 2199 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2251 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
| 2200 _movp(Dest, T); | 2252 _movp(Dest, T); |
| 2201 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2253 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
| 2202 // Use a helper for x86-32. | 2254 // Use a helper for x86-32. |
| 2203 const SizeT MaxSrcs = 1; | 2255 constexpr SizeT MaxSrcs = 1; |
| 2204 Type DestType = Dest->getType(); | 2256 Type DestType = Dest->getType(); |
| 2205 InstCall *Call = | 2257 InstCall *Call = |
| 2206 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2258 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 |
| 2207 : H_sitofp_i64_f64, | 2259 : H_sitofp_i64_f64, |
| 2208 Dest, MaxSrcs); | 2260 Dest, MaxSrcs); |
| 2209 // TODO: Call the correct compiler-rt helper function. | 2261 // TODO: Call the correct compiler-rt helper function. |
| 2210 Call->addArg(Inst->getSrc(0)); | 2262 Call->addArg(Inst->getSrc(0)); |
| 2211 lowerCall(Call); | 2263 lowerCall(Call); |
| 2212 return; | 2264 return; |
| 2213 } else { | 2265 } else { |
| (...skipping 14 matching lines...) Expand all Loading... |
| 2228 _movsx(T_1, Src0RM); | 2280 _movsx(T_1, Src0RM); |
| 2229 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2281 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2230 _mov(Dest, T_2); | 2282 _mov(Dest, T_2); |
| 2231 } | 2283 } |
| 2232 break; | 2284 break; |
| 2233 case InstCast::Uitofp: { | 2285 case InstCast::Uitofp: { |
| 2234 Operand *Src0 = Inst->getSrc(0); | 2286 Operand *Src0 = Inst->getSrc(0); |
| 2235 if (isVectorType(Src0->getType())) { | 2287 if (isVectorType(Src0->getType())) { |
| 2236 assert(Dest->getType() == IceType_v4f32 && | 2288 assert(Dest->getType() == IceType_v4f32 && |
| 2237 Src0->getType() == IceType_v4i32); | 2289 Src0->getType() == IceType_v4i32); |
| 2238 const SizeT MaxSrcs = 1; | 2290 constexpr SizeT MaxSrcs = 1; |
| 2239 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2291 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
| 2240 Call->addArg(Src0); | 2292 Call->addArg(Src0); |
| 2241 lowerCall(Call); | 2293 lowerCall(Call); |
| 2242 } else if (Src0->getType() == IceType_i64 || | 2294 } else if (Src0->getType() == IceType_i64 || |
| 2243 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { | 2295 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
| 2244 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on | 2296 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on |
| 2245 // x86-32. | 2297 // x86-32. |
| 2246 const SizeT MaxSrcs = 1; | 2298 constexpr SizeT MaxSrcs = 1; |
| 2247 Type DestType = Dest->getType(); | 2299 Type DestType = Dest->getType(); |
| 2248 IceString TargetString; | 2300 IceString TargetString; |
| 2249 if (isInt32Asserting32Or64(Src0->getType())) { | 2301 if (isInt32Asserting32Or64(Src0->getType())) { |
| 2250 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2302 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 |
| 2251 : H_uitofp_i32_f64; | 2303 : H_uitofp_i32_f64; |
| 2252 } else { | 2304 } else { |
| 2253 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2305 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 |
| 2254 : H_uitofp_i64_f64; | 2306 : H_uitofp_i64_f64; |
| 2255 } | 2307 } |
| 2256 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2308 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| (...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2452 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2504 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2453 ConstantInteger32 *ElementIndex = | 2505 ConstantInteger32 *ElementIndex = |
| 2454 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2506 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
| 2455 // Only constant indices are allowed in PNaCl IR. | 2507 // Only constant indices are allowed in PNaCl IR. |
| 2456 assert(ElementIndex); | 2508 assert(ElementIndex); |
| 2457 | 2509 |
| 2458 unsigned Index = ElementIndex->getValue(); | 2510 unsigned Index = ElementIndex->getValue(); |
| 2459 Type Ty = SourceVectNotLegalized->getType(); | 2511 Type Ty = SourceVectNotLegalized->getType(); |
| 2460 Type ElementTy = typeElementType(Ty); | 2512 Type ElementTy = typeElementType(Ty); |
| 2461 Type InVectorElementTy = Traits::getInVectorElementType(Ty); | 2513 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
| 2462 Variable *ExtractedElementR = makeReg(InVectorElementTy); | |
| 2463 | 2514 |
| 2464 // TODO(wala): Determine the best lowering sequences for each type. | 2515 // TODO(wala): Determine the best lowering sequences for each type. |
| 2465 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 2516 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 2466 InstructionSet >= Traits::SSE4_1; | 2517 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32); |
| 2467 if (CanUsePextr && Ty != IceType_v4f32) { | 2518 Variable *ExtractedElementR = |
| 2468 // Use pextrb, pextrw, or pextrd. | 2519 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy); |
| 2520 if (CanUsePextr) { |
| 2521 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper |
| 2522 // bits of the destination register, so we represent this by always |
| 2523 // extracting into an i32 register. The _mov into Dest below will do |
| 2524 // truncation as necessary. |
| 2469 Constant *Mask = Ctx->getConstantInt32(Index); | 2525 Constant *Mask = Ctx->getConstantInt32(Index); |
| 2470 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); | 2526 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); |
| 2471 _pextr(ExtractedElementR, SourceVectR, Mask); | 2527 _pextr(ExtractedElementR, SourceVectR, Mask); |
| 2472 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2528 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2473 // Use pshufd and movd/movss. | 2529 // Use pshufd and movd/movss. |
| 2474 Variable *T = nullptr; | 2530 Variable *T = nullptr; |
| 2475 if (Index) { | 2531 if (Index) { |
| 2476 // The shuffle only needs to occur if the element to be extracted is not | 2532 // The shuffle only needs to occur if the element to be extracted is not |
| 2477 // at the lowest index. | 2533 // at the lowest index. |
| 2478 Constant *Mask = Ctx->getConstantInt32(Index); | 2534 Constant *Mask = Ctx->getConstantInt32(Index); |
| (...skipping 492 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2971 // Use insertps, pinsrb, pinsrw, or pinsrd. | 3027 // Use insertps, pinsrb, pinsrw, or pinsrd. |
| 2972 Operand *ElementRM = | 3028 Operand *ElementRM = |
| 2973 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3029 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
| 2974 Operand *SourceVectRM = | 3030 Operand *SourceVectRM = |
| 2975 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3031 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 2976 Variable *T = makeReg(Ty); | 3032 Variable *T = makeReg(Ty); |
| 2977 _movp(T, SourceVectRM); | 3033 _movp(T, SourceVectRM); |
| 2978 if (Ty == IceType_v4f32) | 3034 if (Ty == IceType_v4f32) |
| 2979 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3035 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
| 2980 else | 3036 else |
| 3037 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source |
| 3038 // operand is a register, it must be a full r32 register like eax, and not |
| 3039 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates |
| 3040 // for the use of r16 and r8 by converting them through getBaseReg(), |
| 3041 // while emitIAS() validates that the original and base register encodings |
| 3042 // are the same. But for an "interior" register like ah, it should |
| 3043 // probably be copied into an r32 via movzx so that the types work out. |
| 2981 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 3044 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); |
| 2982 _movp(Inst->getDest(), T); | 3045 _movp(Inst->getDest(), T); |
| 2983 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 3046 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2984 // Use shufps or movss. | 3047 // Use shufps or movss. |
| 2985 Variable *ElementR = nullptr; | 3048 Variable *ElementR = nullptr; |
| 2986 Operand *SourceVectRM = | 3049 Operand *SourceVectRM = |
| 2987 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3050 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 2988 | 3051 |
| 2989 if (InVectorElementTy == IceType_f32) { | 3052 if (InVectorElementTy == IceType_f32) { |
| 2990 // ElementR will be in an XMM register since it is floating point. | 3053 // ElementR will be in an XMM register since it is floating point. |
| (...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3305 // well-defined value. | 3368 // well-defined value. |
| 3306 Operand *Val = legalize(Instr->getArg(0)); | 3369 Operand *Val = legalize(Instr->getArg(0)); |
| 3307 Operand *FirstVal; | 3370 Operand *FirstVal; |
| 3308 Operand *SecondVal = nullptr; | 3371 Operand *SecondVal = nullptr; |
| 3309 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { | 3372 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 3310 FirstVal = loOperand(Val); | 3373 FirstVal = loOperand(Val); |
| 3311 SecondVal = hiOperand(Val); | 3374 SecondVal = hiOperand(Val); |
| 3312 } else { | 3375 } else { |
| 3313 FirstVal = Val; | 3376 FirstVal = Val; |
| 3314 } | 3377 } |
| 3315 const bool IsCttz = false; | 3378 constexpr bool IsCttz = false; |
| 3316 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3379 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
| 3317 SecondVal); | 3380 SecondVal); |
| 3318 return; | 3381 return; |
| 3319 } | 3382 } |
| 3320 case Intrinsics::Cttz: { | 3383 case Intrinsics::Cttz: { |
| 3321 // The "is zero undef" parameter is ignored and we always return a | 3384 // The "is zero undef" parameter is ignored and we always return a |
| 3322 // well-defined value. | 3385 // well-defined value. |
| 3323 Operand *Val = legalize(Instr->getArg(0)); | 3386 Operand *Val = legalize(Instr->getArg(0)); |
| 3324 Operand *FirstVal; | 3387 Operand *FirstVal; |
| 3325 Operand *SecondVal = nullptr; | 3388 Operand *SecondVal = nullptr; |
| 3326 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { | 3389 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 3327 FirstVal = hiOperand(Val); | 3390 FirstVal = hiOperand(Val); |
| 3328 SecondVal = loOperand(Val); | 3391 SecondVal = loOperand(Val); |
| 3329 } else { | 3392 } else { |
| 3330 FirstVal = Val; | 3393 FirstVal = Val; |
| 3331 } | 3394 } |
| 3332 const bool IsCttz = true; | 3395 constexpr bool IsCttz = true; |
| 3333 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3396 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
| 3334 SecondVal); | 3397 SecondVal); |
| 3335 return; | 3398 return; |
| 3336 } | 3399 } |
| 3337 case Intrinsics::Fabs: { | 3400 case Intrinsics::Fabs: { |
| 3338 Operand *Src = legalize(Instr->getArg(0)); | 3401 Operand *Src = legalize(Instr->getArg(0)); |
| 3339 Type Ty = Src->getType(); | 3402 Type Ty = Src->getType(); |
| 3340 Variable *Dest = Instr->getDest(); | 3403 Variable *Dest = Instr->getDest(); |
| 3341 Variable *T = makeVectorOfFabsMask(Ty); | 3404 Variable *T = makeVectorOfFabsMask(Ty); |
| 3342 // The pand instruction operates on an m128 memory operand, so if Src is an | 3405 // The pand instruction operates on an m128 memory operand, so if Src is an |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3420 Func->setError("Should not be lowering UnknownIntrinsic"); | 3483 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3421 return; | 3484 return; |
| 3422 } | 3485 } |
| 3423 return; | 3486 return; |
| 3424 } | 3487 } |
| 3425 | 3488 |
| 3426 template <class Machine> | 3489 template <class Machine> |
| 3427 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3490 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3428 Operand *Ptr, Operand *Expected, | 3491 Operand *Ptr, Operand *Expected, |
| 3429 Operand *Desired) { | 3492 Operand *Desired) { |
| 3430 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { | 3493 Type Ty = Expected->getType(); |
| 3494 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 3431 // Reserve the pre-colored registers first, before adding any more | 3495 // Reserve the pre-colored registers first, before adding any more |
| 3432 // infinite-weight variables from formMemoryOperand's legalization. | 3496 // infinite-weight variables from formMemoryOperand's legalization. |
| 3433 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3497 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3434 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3498 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3435 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3499 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3436 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3500 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3437 _mov(T_eax, loOperand(Expected)); | 3501 _mov(T_eax, loOperand(Expected)); |
| 3438 _mov(T_edx, hiOperand(Expected)); | 3502 _mov(T_edx, hiOperand(Expected)); |
| 3439 _mov(T_ebx, loOperand(Desired)); | 3503 _mov(T_ebx, loOperand(Desired)); |
| 3440 _mov(T_ecx, hiOperand(Desired)); | 3504 _mov(T_ecx, hiOperand(Desired)); |
| 3441 typename Traits::X86OperandMem *Addr = | 3505 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3442 formMemoryOperand(Ptr, Expected->getType()); | 3506 constexpr bool Locked = true; |
| 3443 const bool Locked = true; | |
| 3444 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3507 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3445 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3508 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 3446 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3509 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 3447 _mov(DestLo, T_eax); | 3510 _mov(DestLo, T_eax); |
| 3448 _mov(DestHi, T_edx); | 3511 _mov(DestHi, T_edx); |
| 3449 return; | 3512 return; |
| 3450 } | 3513 } |
| 3451 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); | 3514 int32_t Eax; |
| 3515 switch (Ty) { |
| 3516 default: |
| 3517 llvm_unreachable("Bad type for cmpxchg"); |
| 3518 // fallthrough |
| 3519 case IceType_i32: |
| 3520 Eax = Traits::RegisterSet::Reg_eax; |
| 3521 break; |
| 3522 case IceType_i16: |
| 3523 Eax = Traits::RegisterSet::Reg_ax; |
| 3524 break; |
| 3525 case IceType_i8: |
| 3526 Eax = Traits::RegisterSet::Reg_al; |
| 3527 break; |
| 3528 } |
| 3529 Variable *T_eax = makeReg(Ty, Eax); |
| 3452 _mov(T_eax, Expected); | 3530 _mov(T_eax, Expected); |
| 3453 typename Traits::X86OperandMem *Addr = | 3531 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3454 formMemoryOperand(Ptr, Expected->getType()); | |
| 3455 Variable *DesiredReg = legalizeToReg(Desired); | 3532 Variable *DesiredReg = legalizeToReg(Desired); |
| 3456 const bool Locked = true; | 3533 constexpr bool Locked = true; |
| 3457 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3534 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 3458 _mov(DestPrev, T_eax); | 3535 _mov(DestPrev, T_eax); |
| 3459 } | 3536 } |
| 3460 | 3537 |
| 3461 template <class Machine> | 3538 template <class Machine> |
| 3462 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 3539 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
| 3463 Operand *PtrToMem, | 3540 Operand *PtrToMem, |
| 3464 Operand *Expected, | 3541 Operand *Expected, |
| 3465 Operand *Desired) { | 3542 Operand *Desired) { |
| 3466 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3543 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3548 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3625 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3549 // All the fall-through paths must set this to true, but use this | 3626 // All the fall-through paths must set this to true, but use this |
| 3550 // for asserting. | 3627 // for asserting. |
| 3551 NeedsCmpxchg = true; | 3628 NeedsCmpxchg = true; |
| 3552 Op_Lo = &TargetX86Base<Machine>::_add; | 3629 Op_Lo = &TargetX86Base<Machine>::_add; |
| 3553 Op_Hi = &TargetX86Base<Machine>::_adc; | 3630 Op_Hi = &TargetX86Base<Machine>::_adc; |
| 3554 break; | 3631 break; |
| 3555 } | 3632 } |
| 3556 typename Traits::X86OperandMem *Addr = | 3633 typename Traits::X86OperandMem *Addr = |
| 3557 formMemoryOperand(Ptr, Dest->getType()); | 3634 formMemoryOperand(Ptr, Dest->getType()); |
| 3558 const bool Locked = true; | 3635 constexpr bool Locked = true; |
| 3559 Variable *T = nullptr; | 3636 Variable *T = nullptr; |
| 3560 _mov(T, Val); | 3637 _mov(T, Val); |
| 3561 _xadd(Addr, T, Locked); | 3638 _xadd(Addr, T, Locked); |
| 3562 _mov(Dest, T); | 3639 _mov(Dest, T); |
| 3563 return; | 3640 return; |
| 3564 } | 3641 } |
| 3565 case Intrinsics::AtomicSub: { | 3642 case Intrinsics::AtomicSub: { |
| 3566 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3643 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3567 NeedsCmpxchg = true; | 3644 NeedsCmpxchg = true; |
| 3568 Op_Lo = &TargetX86Base<Machine>::_sub; | 3645 Op_Lo = &TargetX86Base<Machine>::_sub; |
| 3569 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3646 Op_Hi = &TargetX86Base<Machine>::_sbb; |
| 3570 break; | 3647 break; |
| 3571 } | 3648 } |
| 3572 typename Traits::X86OperandMem *Addr = | 3649 typename Traits::X86OperandMem *Addr = |
| 3573 formMemoryOperand(Ptr, Dest->getType()); | 3650 formMemoryOperand(Ptr, Dest->getType()); |
| 3574 const bool Locked = true; | 3651 constexpr bool Locked = true; |
| 3575 Variable *T = nullptr; | 3652 Variable *T = nullptr; |
| 3576 _mov(T, Val); | 3653 _mov(T, Val); |
| 3577 _neg(T); | 3654 _neg(T); |
| 3578 _xadd(Addr, T, Locked); | 3655 _xadd(Addr, T, Locked); |
| 3579 _mov(Dest, T); | 3656 _mov(Dest, T); |
| 3580 return; | 3657 return; |
| 3581 } | 3658 } |
| 3582 case Intrinsics::AtomicOr: | 3659 case Intrinsics::AtomicOr: |
| 3583 // TODO(jvoung): If Dest is null or dead, then some of these | 3660 // TODO(jvoung): If Dest is null or dead, then some of these |
| 3584 // operations do not need an "exchange", but just a locked op. | 3661 // operations do not need an "exchange", but just a locked op. |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3672 _mov(T_ecx, T_edx); | 3749 _mov(T_ecx, T_edx); |
| 3673 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3750 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
| 3674 } else { | 3751 } else { |
| 3675 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3752 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
| 3676 // It just needs the Val loaded into ebx and ecx. | 3753 // It just needs the Val loaded into ebx and ecx. |
| 3677 // That can also be done before the loop. | 3754 // That can also be done before the loop. |
| 3678 _mov(T_ebx, loOperand(Val)); | 3755 _mov(T_ebx, loOperand(Val)); |
| 3679 _mov(T_ecx, hiOperand(Val)); | 3756 _mov(T_ecx, hiOperand(Val)); |
| 3680 Context.insert(Label); | 3757 Context.insert(Label); |
| 3681 } | 3758 } |
| 3682 const bool Locked = true; | 3759 constexpr bool Locked = true; |
| 3683 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3760 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3684 _br(Traits::Cond::Br_ne, Label); | 3761 _br(Traits::Cond::Br_ne, Label); |
| 3685 if (!IsXchg8b) { | 3762 if (!IsXchg8b) { |
| 3686 // If Val is a variable, model the extended live range of Val through | 3763 // If Val is a variable, model the extended live range of Val through |
| 3687 // the end of the loop, since it will be re-used by the loop. | 3764 // the end of the loop, since it will be re-used by the loop. |
| 3688 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3765 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3689 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3766 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
| 3690 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3767 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
| 3691 Context.insert(InstFakeUse::create(Func, ValLo)); | 3768 Context.insert(InstFakeUse::create(Func, ValLo)); |
| 3692 Context.insert(InstFakeUse::create(Func, ValHi)); | 3769 Context.insert(InstFakeUse::create(Func, ValHi)); |
| 3693 } | 3770 } |
| 3694 } else { | 3771 } else { |
| 3695 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3772 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
| 3696 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3773 Context.insert(InstFakeUse::create(Func, T_ebx)); |
| 3697 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3774 Context.insert(InstFakeUse::create(Func, T_ecx)); |
| 3698 } | 3775 } |
| 3699 // The address base (if any) is also reused in the loop. | 3776 // The address base (if any) is also reused in the loop. |
| 3700 if (Variable *Base = Addr->getBase()) | 3777 if (Variable *Base = Addr->getBase()) |
| 3701 Context.insert(InstFakeUse::create(Func, Base)); | 3778 Context.insert(InstFakeUse::create(Func, Base)); |
| 3702 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3779 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3780 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3704 _mov(DestLo, T_eax); | 3781 _mov(DestLo, T_eax); |
| 3705 _mov(DestHi, T_edx); | 3782 _mov(DestHi, T_edx); |
| 3706 return; | 3783 return; |
| 3707 } | 3784 } |
| 3708 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3785 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3709 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); | 3786 int32_t Eax; |
| 3787 switch (Ty) { |
| 3788 default: |
| 3789 llvm_unreachable("Bad type for atomicRMW"); |
| 3790 // fallthrough |
| 3791 case IceType_i32: |
| 3792 Eax = Traits::RegisterSet::Reg_eax; |
| 3793 break; |
| 3794 case IceType_i16: |
| 3795 Eax = Traits::RegisterSet::Reg_ax; |
| 3796 break; |
| 3797 case IceType_i8: |
| 3798 Eax = Traits::RegisterSet::Reg_al; |
| 3799 break; |
| 3800 } |
| 3801 Variable *T_eax = makeReg(Ty, Eax); |
| 3710 _mov(T_eax, Addr); | 3802 _mov(T_eax, Addr); |
| 3711 typename Traits::Insts::Label *Label = | 3803 typename Traits::Insts::Label *Label = |
| 3712 Traits::Insts::Label::create(Func, this); | 3804 Traits::Insts::Label::create(Func, this); |
| 3713 Context.insert(Label); | 3805 Context.insert(Label); |
| 3714 // We want to pick a different register for T than Eax, so don't use | 3806 // We want to pick a different register for T than Eax, so don't use |
| 3715 // _mov(T == nullptr, T_eax). | 3807 // _mov(T == nullptr, T_eax). |
| 3716 Variable *T = makeReg(Ty); | 3808 Variable *T = makeReg(Ty); |
| 3717 _mov(T, T_eax); | 3809 _mov(T, T_eax); |
| 3718 (this->*Op_Lo)(T, Val); | 3810 (this->*Op_Lo)(T, Val); |
| 3719 const bool Locked = true; | 3811 constexpr bool Locked = true; |
| 3720 _cmpxchg(Addr, T_eax, T, Locked); | 3812 _cmpxchg(Addr, T_eax, T, Locked); |
| 3721 _br(Traits::Cond::Br_ne, Label); | 3813 _br(Traits::Cond::Br_ne, Label); |
| 3722 // If Val is a variable, model the extended live range of Val through | 3814 // If Val is a variable, model the extended live range of Val through |
| 3723 // the end of the loop, since it will be re-used by the loop. | 3815 // the end of the loop, since it will be re-used by the loop. |
| 3724 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3816 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3725 Context.insert(InstFakeUse::create(Func, ValVar)); | 3817 Context.insert(InstFakeUse::create(Func, ValVar)); |
| 3726 } | 3818 } |
| 3727 // The address base (if any) is also reused in the loop. | 3819 // The address base (if any) is also reused in the loop. |
| 3728 if (Variable *Base = Addr->getBase()) | 3820 if (Variable *Base = Addr->getBase()) |
| 3729 Context.insert(InstFakeUse::create(Func, Base)); | 3821 Context.insert(InstFakeUse::create(Func, Base)); |
| (...skipping 1357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5087 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 5179 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
| 5088 Ty == IceType_v16i8); | 5180 Ty == IceType_v16i8); |
| 5089 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 5181 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
| 5090 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 5182 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
| 5091 SizeT Shift = | 5183 SizeT Shift = |
| 5092 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 5184 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
| 5093 _psll(Reg, Ctx->getConstantInt8(Shift)); | 5185 _psll(Reg, Ctx->getConstantInt8(Shift)); |
| 5094 return Reg; | 5186 return Reg; |
| 5095 } else { | 5187 } else { |
| 5096 // SSE has no left shift operation for vectors of 8 bit integers. | 5188 // SSE has no left shift operation for vectors of 8 bit integers. |
| 5097 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 5189 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| 5098 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 5190 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| 5099 Variable *Reg = makeReg(Ty, RegNum); | 5191 Variable *Reg = makeReg(Ty, RegNum); |
| 5100 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 5192 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
| 5101 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 5193 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
| 5102 return Reg; | 5194 return Reg; |
| 5103 } | 5195 } |
| 5104 } | 5196 } |
| 5105 | 5197 |
| 5106 /// Construct a mask in a register that can be and'ed with a floating-point | 5198 /// Construct a mask in a register that can be and'ed with a floating-point |
| 5107 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 5199 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
| (...skipping 13 matching lines...) Expand all Loading... |
| 5121 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 5213 typename TargetX86Base<Machine>::Traits::X86OperandMem * |
| 5122 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 5214 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| 5123 uint32_t Offset) { | 5215 uint32_t Offset) { |
| 5124 // Ensure that Loc is a stack slot. | 5216 // Ensure that Loc is a stack slot. |
| 5125 assert(Slot->mustNotHaveReg()); | 5217 assert(Slot->mustNotHaveReg()); |
| 5126 assert(Slot->getRegNum() == Variable::NoRegister); | 5218 assert(Slot->getRegNum() == Variable::NoRegister); |
| 5127 // Compute the location of Loc in memory. | 5219 // Compute the location of Loc in memory. |
| 5128 // TODO(wala,stichnot): lea should not | 5220 // TODO(wala,stichnot): lea should not |
| 5129 // be required. The address of the stack slot is known at compile time | 5221 // be required. The address of the stack slot is known at compile time |
| 5130 // (although not until after addProlog()). | 5222 // (although not until after addProlog()). |
| 5131 const Type PointerType = IceType_i32; | 5223 constexpr Type PointerType = IceType_i32; |
| 5132 Variable *Loc = makeReg(PointerType); | 5224 Variable *Loc = makeReg(PointerType); |
| 5133 _lea(Loc, Slot); | 5225 _lea(Loc, Slot); |
| 5134 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5226 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 5135 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); | 5227 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); |
| 5136 } | 5228 } |
| 5137 | 5229 |
| 5138 /// Helper for legalize() to emit the right code to lower an operand to a | 5230 /// Helper for legalize() to emit the right code to lower an operand to a |
| 5139 /// register of the appropriate type. | 5231 /// register of the appropriate type. |
| 5140 template <class Machine> | 5232 template <class Machine> |
| 5141 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5233 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5174 if (Subst->mustHaveReg() && !Subst->hasReg()) { | 5266 if (Subst->mustHaveReg() && !Subst->hasReg()) { |
| 5175 // At this point we know the substitution will have a register. | 5267 // At this point we know the substitution will have a register. |
| 5176 if (From->getType() == Subst->getType()) { | 5268 if (From->getType() == Subst->getType()) { |
| 5177 // At this point we know the substitution's register is compatible. | 5269 // At this point we know the substitution's register is compatible. |
| 5178 return Subst; | 5270 return Subst; |
| 5179 } | 5271 } |
| 5180 } | 5272 } |
| 5181 } | 5273 } |
| 5182 } | 5274 } |
| 5183 | 5275 |
| 5184 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 5276 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { |
| 5185 // Before doing anything with a Mem operand, we need to ensure that the | 5277 // Before doing anything with a Mem operand, we need to ensure that the |
| 5186 // Base and Index components are in physical registers. | 5278 // Base and Index components are in physical registers. |
| 5187 Variable *Base = Mem->getBase(); | 5279 Variable *Base = Mem->getBase(); |
| 5188 Variable *Index = Mem->getIndex(); | 5280 Variable *Index = Mem->getIndex(); |
| 5189 Variable *RegBase = nullptr; | 5281 Variable *RegBase = nullptr; |
| 5190 Variable *RegIndex = nullptr; | 5282 Variable *RegIndex = nullptr; |
| 5191 if (Base) { | 5283 if (Base) { |
| 5192 RegBase = legalizeToReg(Base); | 5284 RegBase = legalizeToReg(Base); |
| 5193 } | 5285 } |
| 5194 if (Index) { | 5286 if (Index) { |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5252 // Immediate specifically not allowed | 5344 // Immediate specifically not allowed |
| 5253 NeedsReg = true; | 5345 NeedsReg = true; |
| 5254 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) | 5346 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) |
| 5255 // On x86, FP constants are lowered to mem operands. | 5347 // On x86, FP constants are lowered to mem operands. |
| 5256 NeedsReg = true; | 5348 NeedsReg = true; |
| 5257 if (NeedsReg) { | 5349 if (NeedsReg) { |
| 5258 From = copyToReg(From, RegNum); | 5350 From = copyToReg(From, RegNum); |
| 5259 } | 5351 } |
| 5260 return From; | 5352 return From; |
| 5261 } | 5353 } |
| 5262 if (auto Var = llvm::dyn_cast<Variable>(From)) { | 5354 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
| 5263 // Check if the variable is guaranteed a physical register. This can happen | 5355 // Check if the variable is guaranteed a physical register. This can happen |
| 5264 // either when the variable is pre-colored or when it is assigned infinite | 5356 // either when the variable is pre-colored or when it is assigned infinite |
| 5265 // weight. | 5357 // weight. |
| 5266 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 5358 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
| 5267 // We need a new physical register for the operand if: | 5359 // We need a new physical register for the operand if: |
| 5268 // Mem is not allowed and Var isn't guaranteed a physical | 5360 // Mem is not allowed and Var isn't guaranteed a physical |
| 5269 // register, or | 5361 // register, or |
| 5270 // RegNum is required and Var->getRegNum() doesn't match. | 5362 // RegNum is required and Var->getRegNum() doesn't match. |
| 5271 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 5363 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
| 5272 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5364 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
| (...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5507 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); | 5599 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); |
| 5508 Immediate->setShouldBePooled(true); | 5600 Immediate->setShouldBePooled(true); |
| 5509 // if we have already assigned a phy register, we must come from | 5601 // if we have already assigned a phy register, we must come from |
| 5510 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 5602 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
| 5511 // assigned register as this assignment is that start of its use-def | 5603 // assigned register as this assignment is that start of its use-def |
| 5512 // chain. So we add RegNum argument here. | 5604 // chain. So we add RegNum argument here. |
| 5513 Variable *Reg = makeReg(Immediate->getType(), RegNum); | 5605 Variable *Reg = makeReg(Immediate->getType(), RegNum); |
| 5514 IceString Label; | 5606 IceString Label; |
| 5515 llvm::raw_string_ostream Label_stream(Label); | 5607 llvm::raw_string_ostream Label_stream(Label); |
| 5516 Immediate->emitPoolLabel(Label_stream, Ctx); | 5608 Immediate->emitPoolLabel(Label_stream, Ctx); |
| 5517 const RelocOffsetT Offset = 0; | 5609 constexpr RelocOffsetT Offset = 0; |
| 5518 const bool SuppressMangling = true; | 5610 constexpr bool SuppressMangling = true; |
| 5519 Constant *Symbol = | 5611 Constant *Symbol = |
| 5520 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); | 5612 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); |
| 5521 typename Traits::X86OperandMem *MemOperand = | 5613 typename Traits::X86OperandMem *MemOperand = |
| 5522 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, | 5614 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, |
| 5523 Symbol); | 5615 Symbol); |
| 5524 _mov(Reg, MemOperand); | 5616 _mov(Reg, MemOperand); |
| 5525 return Reg; | 5617 return Reg; |
| 5526 } | 5618 } |
| 5527 assert("Unsupported -randomize-pool-immediates option" && false); | 5619 assert("Unsupported -randomize-pool-immediates option" && false); |
| 5528 } | 5620 } |
| (...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5604 // phi lowering, we should not ask for new physical registers in | 5696 // phi lowering, we should not ask for new physical registers in |
| 5605 // general. However, if we do meet Memory Operand during phi lowering, | 5697 // general. However, if we do meet Memory Operand during phi lowering, |
| 5606 // we should not blind or pool the immediates for now. | 5698 // we should not blind or pool the immediates for now. |
| 5607 if (RegNum != Variable::NoRegister) | 5699 if (RegNum != Variable::NoRegister) |
| 5608 return MemOperand; | 5700 return MemOperand; |
| 5609 Variable *RegTemp = makeReg(IceType_i32); | 5701 Variable *RegTemp = makeReg(IceType_i32); |
| 5610 IceString Label; | 5702 IceString Label; |
| 5611 llvm::raw_string_ostream Label_stream(Label); | 5703 llvm::raw_string_ostream Label_stream(Label); |
| 5612 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); | 5704 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); |
| 5613 MemOperand->getOffset()->setShouldBePooled(true); | 5705 MemOperand->getOffset()->setShouldBePooled(true); |
| 5614 const RelocOffsetT SymOffset = 0; | 5706 constexpr RelocOffsetT SymOffset = 0; |
| 5615 bool SuppressMangling = true; | 5707 constexpr bool SuppressMangling = true; |
| 5616 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), | 5708 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), |
| 5617 SuppressMangling); | 5709 SuppressMangling); |
| 5618 typename Traits::X86OperandMem *SymbolOperand = | 5710 typename Traits::X86OperandMem *SymbolOperand = |
| 5619 Traits::X86OperandMem::create( | 5711 Traits::X86OperandMem::create( |
| 5620 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); | 5712 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); |
| 5621 _mov(RegTemp, SymbolOperand); | 5713 _mov(RegTemp, SymbolOperand); |
| 5622 // If we have a base variable here, we should add the lea instruction | 5714 // If we have a base variable here, we should add the lea instruction |
| 5623 // to add the value of the base variable to RegTemp. If there is no | 5715 // to add the value of the base variable to RegTemp. If there is no |
| 5624 // base variable, we won't need this lea instruction. | 5716 // base variable, we won't need this lea instruction. |
| 5625 if (MemOperand->getBase()) { | 5717 if (MemOperand->getBase()) { |
| (...skipping 15 matching lines...) Expand all Loading... |
| 5641 } | 5733 } |
| 5642 // the offset is not eligible for blinding or pooling, return the original | 5734 // the offset is not eligible for blinding or pooling, return the original |
| 5643 // mem operand | 5735 // mem operand |
| 5644 return MemOperand; | 5736 return MemOperand; |
| 5645 } | 5737 } |
| 5646 | 5738 |
| 5647 } // end of namespace X86Internal | 5739 } // end of namespace X86Internal |
| 5648 } // end of namespace Ice | 5740 } // end of namespace Ice |
| 5649 | 5741 |
| 5650 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5742 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |