OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
580 Node->getInsts().insert(I3, RMW); | 580 Node->getInsts().insert(I3, RMW); |
581 } | 581 } |
582 } | 582 } |
583 if (Func->isVerbose(IceV_RMW)) | 583 if (Func->isVerbose(IceV_RMW)) |
584 Func->getContext()->unlockStr(); | 584 Func->getContext()->unlockStr(); |
585 } | 585 } |
586 | 586 |
587 // Converts a ConstantInteger32 operand into its constant value, or | 587 // Converts a ConstantInteger32 operand into its constant value, or |
588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
591 return Integer->getValue(); | 591 return Integer->getValue(); |
592 return Intrinsics::MemoryOrderInvalid; | 592 return Intrinsics::MemoryOrderInvalid; |
593 } | 593 } |
594 | 594 |
595 /// Determines whether the dest of a Load instruction can be folded into one of | 595 /// Determines whether the dest of a Load instruction can be folded into one of |
596 /// the src operands of a 2-operand instruction. This is true as long as the | 596 /// the src operands of a 2-operand instruction. This is true as long as the |
597 /// load dest matches exactly one of the binary instruction's src operands. | 597 /// load dest matches exactly one of the binary instruction's src operands. |
598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. | 598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. |
599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
600 Operand *&Src0, Operand *&Src1) { | 600 Operand *&Src0, Operand *&Src1) { |
(...skipping 14 matching lines...) Expand all Loading... |
615 while (!Context.atEnd()) { | 615 while (!Context.atEnd()) { |
616 Variable *LoadDest = nullptr; | 616 Variable *LoadDest = nullptr; |
617 Operand *LoadSrc = nullptr; | 617 Operand *LoadSrc = nullptr; |
618 Inst *CurInst = Context.getCur(); | 618 Inst *CurInst = Context.getCur(); |
619 Inst *Next = Context.getNextInst(); | 619 Inst *Next = Context.getNextInst(); |
620 // Determine whether the current instruction is a Load instruction or | 620 // Determine whether the current instruction is a Load instruction or |
621 // equivalent. | 621 // equivalent. |
622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
623 // An InstLoad always qualifies. | 623 // An InstLoad always qualifies. |
624 LoadDest = Load->getDest(); | 624 LoadDest = Load->getDest(); |
625 const bool DoLegalize = false; | 625 constexpr bool DoLegalize = false; |
626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | 626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), |
627 LoadDest->getType(), DoLegalize); | 627 LoadDest->getType(), DoLegalize); |
628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | 628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { |
629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory | 629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory |
630 // ordering, and can be implemented in a single instruction (i.e., not | 630 // ordering, and can be implemented in a single instruction (i.e., not |
631 // i64 on x86-32). | 631 // i64 on x86-32). |
632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | 632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; |
633 if (ID == Intrinsics::AtomicLoad && | 633 if (ID == Intrinsics::AtomicLoad && |
634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && | 634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && |
635 Intrinsics::isMemoryOrderValid( | 635 Intrinsics::isMemoryOrderValid( |
636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | 636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { |
637 LoadDest = Intrin->getDest(); | 637 LoadDest = Intrin->getDest(); |
638 const bool DoLegalize = false; | 638 constexpr bool DoLegalize = false; |
639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | 639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), |
640 DoLegalize); | 640 DoLegalize); |
641 } | 641 } |
642 } | 642 } |
643 // A Load instruction can be folded into the following instruction only | 643 // A Load instruction can be folded into the following instruction only |
644 // if the following instruction ends the Load's Dest variable's live | 644 // if the following instruction ends the Load's Dest variable's live |
645 // range. | 645 // range. |
646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { | 646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { |
647 assert(LoadSrc); | 647 assert(LoadSrc); |
648 Inst *NewInst = nullptr; | 648 Inst *NewInst = nullptr; |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
726 // considered live upon function entry. Otherwise it's possible to get | 726 // considered live upon function entry. Otherwise it's possible to get |
727 // liveness validation errors for saving callee-save registers. | 727 // liveness validation errors for saving callee-save registers. |
728 Func->addImplicitArg(Reg); | 728 Func->addImplicitArg(Reg); |
729 // Don't bother tracking the live range of a named physical register. | 729 // Don't bother tracking the live range of a named physical register. |
730 Reg->setIgnoreLiveness(); | 730 Reg->setIgnoreLiveness(); |
731 } | 731 } |
732 return Reg; | 732 return Reg; |
733 } | 733 } |
734 | 734 |
735 template <class Machine> | 735 template <class Machine> |
736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const { |
737 return Traits::getRegName(RegNum, Ty); | 737 return Traits::getRegName(RegNum); |
738 } | 738 } |
739 | 739 |
740 template <class Machine> | 740 template <class Machine> |
741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | 741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { |
742 if (!BuildDefs::dump()) | 742 if (!BuildDefs::dump()) |
743 return; | 743 return; |
744 Ostream &Str = Ctx->getStrEmit(); | 744 Ostream &Str = Ctx->getStrEmit(); |
745 if (Var->hasReg()) { | 745 if (Var->hasReg()) { |
746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
747 return; | 747 return; |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
790 if (Var->mustHaveReg()) { | 790 if (Var->mustHaveReg()) { |
791 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 791 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
792 } | 792 } |
793 int32_t Offset = Var->getStackOffset(); | 793 int32_t Offset = Var->getStackOffset(); |
794 int32_t BaseRegNum = Var->getBaseRegNum(); | 794 int32_t BaseRegNum = Var->getBaseRegNum(); |
795 if (Var->getBaseRegNum() == Variable::NoRegister) { | 795 if (Var->getBaseRegNum() == Variable::NoRegister) { |
796 BaseRegNum = getFrameOrStackReg(); | 796 BaseRegNum = getFrameOrStackReg(); |
797 if (!hasFramePointer()) | 797 if (!hasFramePointer()) |
798 Offset += getStackAdjustment(); | 798 Offset += getStackAdjustment(); |
799 } | 799 } |
800 return typename Traits::Address( | 800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset); |
801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset); | |
802 } | 801 } |
803 | 802 |
804 /// Helper function for addProlog(). | 803 /// Helper function for addProlog(). |
805 /// | 804 /// |
806 /// This assumes Arg is an argument passed on the stack. This sets the frame | 805 /// This assumes Arg is an argument passed on the stack. This sets the frame |
807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 806 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
808 /// I64 arg that has been split into Lo and Hi components, it calls itself | 807 /// I64 arg that has been split into Lo and Hi components, it calls itself |
809 /// recursively on the components, taking care to handle Lo first because of the | 808 /// recursively on the components, taking care to handle Lo first because of the |
810 /// little-endian architecture. Lastly, this function generates an instruction | 809 /// little-endian architecture. Lastly, this function generates an instruction |
811 /// to copy Arg into its assigned register if applicable. | 810 /// to copy Arg into its assigned register if applicable. |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1040 Src1 /= 2; | 1039 Src1 /= 2; |
1041 } else { | 1040 } else { |
1042 return false; | 1041 return false; |
1043 } | 1042 } |
1044 } | 1043 } |
1045 // Lea optimization only works for i16 and i32 types, not i8. | 1044 // Lea optimization only works for i16 and i32 types, not i8. |
1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1045 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
1047 return false; | 1046 return false; |
1048 // Limit the number of lea/shl operations for a single multiply, to a | 1047 // Limit the number of lea/shl operations for a single multiply, to a |
1049 // somewhat arbitrary choice of 3. | 1048 // somewhat arbitrary choice of 3. |
1050 const uint32_t MaxOpsForOptimizedMul = 3; | 1049 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
1051 if (CountOps > MaxOpsForOptimizedMul) | 1050 if (CountOps > MaxOpsForOptimizedMul) |
1052 return false; | 1051 return false; |
1053 _mov(T, Src0); | 1052 _mov(T, Src0); |
1054 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1053 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1055 for (uint32_t i = 0; i < Count9; ++i) { | 1054 for (uint32_t i = 0; i < Count9; ++i) { |
1056 const uint16_t Shift = 3; // log2(9-1) | 1055 constexpr uint16_t Shift = 3; // log2(9-1) |
1057 _lea(T, | 1056 _lea(T, |
1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1057 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1059 } | 1058 } |
1060 for (uint32_t i = 0; i < Count5; ++i) { | 1059 for (uint32_t i = 0; i < Count5; ++i) { |
1061 const uint16_t Shift = 2; // log2(5-1) | 1060 constexpr uint16_t Shift = 2; // log2(5-1) |
1062 _lea(T, | 1061 _lea(T, |
1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1062 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1064 } | 1063 } |
1065 for (uint32_t i = 0; i < Count3; ++i) { | 1064 for (uint32_t i = 0; i < Count3; ++i) { |
1066 const uint16_t Shift = 1; // log2(3-1) | 1065 constexpr uint16_t Shift = 1; // log2(3-1) |
1067 _lea(T, | 1066 _lea(T, |
1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1067 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1069 } | 1068 } |
1070 if (Count2) { | 1069 if (Count2) { |
1071 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1070 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
1072 } | 1071 } |
1073 if (Src1IsNegative) | 1072 if (Src1IsNegative) |
1074 _neg(T); | 1073 _neg(T); |
1075 _mov(Dest, T); | 1074 _mov(Dest, T); |
1076 return true; | 1075 return true; |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1208 } | 1207 } |
1209 } else { | 1208 } else { |
1210 // NON-CONSTANT CASES. | 1209 // NON-CONSTANT CASES. |
1211 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1210 Constant *BitTest = Ctx->getConstantInt32(0x20); |
1212 typename Traits::Insts::Label *Label = | 1211 typename Traits::Insts::Label *Label = |
1213 Traits::Insts::Label::create(Func, this); | 1212 Traits::Insts::Label::create(Func, this); |
1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==> | 1213 // COMMON PREFIX OF: a=b SHIFT_OP c ==> |
1215 // t1:ecx = c.lo & 0xff | 1214 // t1:ecx = c.lo & 0xff |
1216 // t2 = b.lo | 1215 // t2 = b.lo |
1217 // t3 = b.hi | 1216 // t3 = b.hi |
1218 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 1217 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1218 _mov(T_1, Src1Lo); |
1219 _mov(T_2, Src0Lo); | 1219 _mov(T_2, Src0Lo); |
1220 _mov(T_3, Src0Hi); | 1220 _mov(T_3, Src0Hi); |
1221 switch (Op) { | 1221 switch (Op) { |
1222 default: | 1222 default: |
1223 assert(0 && "non-shift op"); | 1223 assert(0 && "non-shift op"); |
1224 break; | 1224 break; |
1225 case InstArithmetic::Shl: { | 1225 case InstArithmetic::Shl: { |
1226 // a=b<<c ==> | 1226 // a=b<<c ==> |
1227 // t3 = shld t3, t2, t1 | 1227 // t3 = shld t3, t2, t1 |
1228 // t2 = shl t2, t1 | 1228 // t2 = shl t2, t1 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1316 } | 1316 } |
1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1317 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
1318 // These x86-32 helper-call-involved instructions are lowered in this | 1318 // These x86-32 helper-call-involved instructions are lowered in this |
1319 // separate switch. This is because loOperand() and hiOperand() may insert | 1319 // separate switch. This is because loOperand() and hiOperand() may insert |
1320 // redundant instructions for constant blinding and pooling. Such redundant | 1320 // redundant instructions for constant blinding and pooling. Such redundant |
1321 // instructions will fail liveness analysis under -Om1 setting. And, | 1321 // instructions will fail liveness analysis under -Om1 setting. And, |
1322 // actually these arguments do not need to be processed with loOperand() | 1322 // actually these arguments do not need to be processed with loOperand() |
1323 // and hiOperand() to be used. | 1323 // and hiOperand() to be used. |
1324 switch (Inst->getOp()) { | 1324 switch (Inst->getOp()) { |
1325 case InstArithmetic::Udiv: { | 1325 case InstArithmetic::Udiv: { |
1326 const SizeT MaxSrcs = 2; | 1326 constexpr SizeT MaxSrcs = 2; |
1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1327 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); |
1328 Call->addArg(Inst->getSrc(0)); | 1328 Call->addArg(Inst->getSrc(0)); |
1329 Call->addArg(Inst->getSrc(1)); | 1329 Call->addArg(Inst->getSrc(1)); |
1330 lowerCall(Call); | 1330 lowerCall(Call); |
1331 return; | 1331 return; |
1332 } | 1332 } |
1333 case InstArithmetic::Sdiv: { | 1333 case InstArithmetic::Sdiv: { |
1334 const SizeT MaxSrcs = 2; | 1334 constexpr SizeT MaxSrcs = 2; |
1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); | 1335 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); |
1336 Call->addArg(Inst->getSrc(0)); | 1336 Call->addArg(Inst->getSrc(0)); |
1337 Call->addArg(Inst->getSrc(1)); | 1337 Call->addArg(Inst->getSrc(1)); |
1338 lowerCall(Call); | 1338 lowerCall(Call); |
1339 return; | 1339 return; |
1340 } | 1340 } |
1341 case InstArithmetic::Urem: { | 1341 case InstArithmetic::Urem: { |
1342 const SizeT MaxSrcs = 2; | 1342 constexpr SizeT MaxSrcs = 2; |
1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); | 1343 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); |
1344 Call->addArg(Inst->getSrc(0)); | 1344 Call->addArg(Inst->getSrc(0)); |
1345 Call->addArg(Inst->getSrc(1)); | 1345 Call->addArg(Inst->getSrc(1)); |
1346 lowerCall(Call); | 1346 lowerCall(Call); |
1347 return; | 1347 return; |
1348 } | 1348 } |
1349 case InstArithmetic::Srem: { | 1349 case InstArithmetic::Srem: { |
1350 const SizeT MaxSrcs = 2; | 1350 constexpr SizeT MaxSrcs = 2; |
1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); | 1351 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); |
1352 Call->addArg(Inst->getSrc(0)); | 1352 Call->addArg(Inst->getSrc(0)); |
1353 Call->addArg(Inst->getSrc(1)); | 1353 Call->addArg(Inst->getSrc(1)); |
1354 lowerCall(Call); | 1354 lowerCall(Call); |
1355 return; | 1355 return; |
1356 } | 1356 } |
1357 default: | 1357 default: |
1358 break; | 1358 break; |
1359 } | 1359 } |
1360 | 1360 |
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1521 // pmuludq T1, Src1 | 1521 // pmuludq T1, Src1 |
1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} | 1522 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
1523 // pmuludq T2, T3 | 1523 // pmuludq T2, T3 |
1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} | 1524 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} |
1525 // shufps T1, T2, {0,2,0,2} | 1525 // shufps T1, T2, {0,2,0,2} |
1526 // pshufd T4, T1, {0,2,1,3} | 1526 // pshufd T4, T1, {0,2,1,3} |
1527 // movups Dest, T4 | 1527 // movups Dest, T4 |
1528 | 1528 |
1529 // Mask that directs pshufd to create a vector with entries | 1529 // Mask that directs pshufd to create a vector with entries |
1530 // Src[1, 0, 3, 0] | 1530 // Src[1, 0, 3, 0] |
1531 const unsigned Constant1030 = 0x31; | 1531 constexpr unsigned Constant1030 = 0x31; |
1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); | 1532 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); |
1533 // Mask that directs shufps to create a vector with entries | 1533 // Mask that directs shufps to create a vector with entries |
1534 // Dest[0, 2], Src[0, 2] | 1534 // Dest[0, 2], Src[0, 2] |
1535 const unsigned Mask0202 = 0x88; | 1535 constexpr unsigned Mask0202 = 0x88; |
1536 // Mask that directs pshufd to create a vector with entries | 1536 // Mask that directs pshufd to create a vector with entries |
1537 // Src[0, 2, 1, 3] | 1537 // Src[0, 2, 1, 3] |
1538 const unsigned Mask0213 = 0xd8; | 1538 constexpr unsigned Mask0213 = 0xd8; |
1539 Variable *T1 = makeReg(IceType_v4i32); | 1539 Variable *T1 = makeReg(IceType_v4i32); |
1540 Variable *T2 = makeReg(IceType_v4i32); | 1540 Variable *T2 = makeReg(IceType_v4i32); |
1541 Variable *T3 = makeReg(IceType_v4i32); | 1541 Variable *T3 = makeReg(IceType_v4i32); |
1542 Variable *T4 = makeReg(IceType_v4i32); | 1542 Variable *T4 = makeReg(IceType_v4i32); |
1543 _movp(T1, Src0); | 1543 _movp(T1, Src0); |
1544 _pshufd(T2, Src0, Mask1030); | 1544 _pshufd(T2, Src0, Mask1030); |
1545 _pshufd(T3, Src1, Mask1030); | 1545 _pshufd(T3, Src1, Mask1030); |
1546 _pmuludq(T1, Src1); | 1546 _pmuludq(T1, Src1); |
1547 _pmuludq(T2, T3); | 1547 _pmuludq(T2, T3); |
1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); | 1548 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1623 _mov(T, Src0); | 1623 _mov(T, Src0); |
1624 _sub(T, Src1); | 1624 _sub(T, Src1); |
1625 _mov(Dest, T); | 1625 _mov(Dest, T); |
1626 break; | 1626 break; |
1627 case InstArithmetic::Mul: | 1627 case InstArithmetic::Mul: |
1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1628 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
1629 if (optimizeScalarMul(Dest, Src0, C->getValue())) | 1629 if (optimizeScalarMul(Dest, Src0, C->getValue())) |
1630 return; | 1630 return; |
1631 } | 1631 } |
1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must | 1632 // The 8-bit version of imul only allows the form "imul r/m8" where T must |
1633 // be in eax. | 1633 // be in al. |
1634 if (isByteSizedArithType(Dest->getType())) { | 1634 if (isByteSizedArithType(Dest->getType())) { |
1635 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1635 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1636 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1637 _imul(T, Src0 == Src1 ? T : Src1); | 1637 _imul(T, Src0 == Src1 ? T : Src1); |
1638 _mov(Dest, T); | 1638 _mov(Dest, T); |
1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1639 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
1640 T = makeReg(Dest->getType()); | 1640 T = makeReg(Dest->getType()); |
1641 _imul_imm(T, Src0, ImmConst); | 1641 _imul_imm(T, Src0, ImmConst); |
1642 _mov(Dest, T); | 1642 _mov(Dest, T); |
1643 } else { | 1643 } else { |
1644 _mov(T, Src0); | 1644 _mov(T, Src0); |
1645 _imul(T, Src0 == Src1 ? T : Src1); | 1645 _imul(T, Src0 == Src1 ? T : Src1); |
1646 _mov(Dest, T); | 1646 _mov(Dest, T); |
1647 } | 1647 } |
1648 break; | 1648 break; |
1649 case InstArithmetic::Shl: | 1649 case InstArithmetic::Shl: |
1650 _mov(T, Src0); | 1650 _mov(T, Src0); |
1651 if (!llvm::isa<ConstantInteger32>(Src1)) | 1651 if (!llvm::isa<ConstantInteger32>(Src1)) { |
1652 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1652 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1653 _mov(Cl, Src1); |
| 1654 Src1 = Cl; |
| 1655 } |
1653 _shl(T, Src1); | 1656 _shl(T, Src1); |
1654 _mov(Dest, T); | 1657 _mov(Dest, T); |
1655 break; | 1658 break; |
1656 case InstArithmetic::Lshr: | 1659 case InstArithmetic::Lshr: |
1657 _mov(T, Src0); | 1660 _mov(T, Src0); |
1658 if (!llvm::isa<ConstantInteger32>(Src1)) | 1661 if (!llvm::isa<ConstantInteger32>(Src1)) { |
1659 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1662 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1663 _mov(Cl, Src1); |
| 1664 Src1 = Cl; |
| 1665 } |
1660 _shr(T, Src1); | 1666 _shr(T, Src1); |
1661 _mov(Dest, T); | 1667 _mov(Dest, T); |
1662 break; | 1668 break; |
1663 case InstArithmetic::Ashr: | 1669 case InstArithmetic::Ashr: |
1664 _mov(T, Src0); | 1670 _mov(T, Src0); |
1665 if (!llvm::isa<ConstantInteger32>(Src1)) | 1671 if (!llvm::isa<ConstantInteger32>(Src1)) { |
1666 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1672 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1673 _mov(Cl, Src1); |
| 1674 Src1 = Cl; |
| 1675 } |
1667 _sar(T, Src1); | 1676 _sar(T, Src1); |
1668 _mov(Dest, T); | 1677 _mov(Dest, T); |
1669 break; | 1678 break; |
1670 case InstArithmetic::Udiv: | 1679 case InstArithmetic::Udiv: |
1671 // div and idiv are the few arithmetic operators that do not allow | 1680 // div and idiv are the few arithmetic operators that do not allow |
1672 // immediates as the operand. | 1681 // immediates as the operand. |
1673 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1682 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1674 if (isByteSizedArithType(Dest->getType())) { | 1683 if (isByteSizedArithType(Dest->getType())) { |
1675 // For 8-bit unsigned division we need to zero-extend al into ah. A mov | 1684 // For 8-bit unsigned division we need to zero-extend al into ah. A mov |
1676 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 | 1685 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 |
1677 // assembler refuses to encode %ah (encoding %spl with a REX prefix | 1686 // assembler refuses to encode %ah (encoding %spl with a REX prefix |
1678 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah | 1687 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah |
1679 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and | 1688 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and |
1680 // d[%lh], which means the X86 target lowering (and the register | 1689 // d[%lh], which means the X86 target lowering (and the register |
1681 // allocator) would have to be aware of this restriction. For now, we | 1690 // allocator) would have to be aware of this restriction. For now, we |
1682 // simply zero %eax completely, and move the dividend into %al. | 1691 // simply zero %eax completely, and move the dividend into %al. |
1683 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1692 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
1684 Context.insert(InstFakeDef::create(Func, T_eax)); | 1693 Context.insert(InstFakeDef::create(Func, T_eax)); |
1685 _xor(T_eax, T_eax); | 1694 _xor(T_eax, T_eax); |
1686 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1695 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1687 _div(T, Src1, T); | 1696 _div(T, Src1, T); |
1688 _mov(Dest, T); | 1697 _mov(Dest, T); |
1689 Context.insert(InstFakeUse::create(Func, T_eax)); | 1698 Context.insert(InstFakeUse::create(Func, T_eax)); |
1690 } else { | 1699 } else { |
1691 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1700 Type Ty = Dest->getType(); |
1692 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1701 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
1693 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1702 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1703 switch (Ty) { |
| 1704 default: |
| 1705 llvm_unreachable("Bad type for udiv"); |
| 1706 // fallthrough |
| 1707 case IceType_i32: |
| 1708 break; |
| 1709 case IceType_i16: |
| 1710 Eax = Traits::RegisterSet::Reg_ax; |
| 1711 Edx = Traits::RegisterSet::Reg_dx; |
| 1712 break; |
| 1713 } |
| 1714 Constant *Zero = Ctx->getConstantZero(Ty); |
| 1715 _mov(T, Src0, Eax); |
| 1716 _mov(T_edx, Zero, Edx); |
1694 _div(T, Src1, T_edx); | 1717 _div(T, Src1, T_edx); |
1695 _mov(Dest, T); | 1718 _mov(Dest, T); |
1696 } | 1719 } |
1697 break; | 1720 break; |
1698 case InstArithmetic::Sdiv: | 1721 case InstArithmetic::Sdiv: |
1699 // TODO(stichnot): Enable this after doing better performance and cross | 1722 // TODO(stichnot): Enable this after doing better performance and cross |
1700 // testing. | 1723 // testing. |
1701 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1724 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
1702 // Optimize division by constant power of 2, but not for Om1 or O0, just | 1725 // Optimize division by constant power of 2, but not for Om1 or O0, just |
1703 // to keep things simple there. | 1726 // to keep things simple there. |
(...skipping 21 matching lines...) Expand all Loading... |
1725 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 1748 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
1726 _add(T, Src0); | 1749 _add(T, Src0); |
1727 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1750 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
1728 } | 1751 } |
1729 _mov(Dest, T); | 1752 _mov(Dest, T); |
1730 return; | 1753 return; |
1731 } | 1754 } |
1732 } | 1755 } |
1733 } | 1756 } |
1734 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1757 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1735 if (isByteSizedArithType(Dest->getType())) { | 1758 switch (Type Ty = Dest->getType()) { |
| 1759 default: |
| 1760 llvm_unreachable("Bad type for sdiv"); |
| 1761 // fallthrough |
| 1762 case IceType_i32: |
| 1763 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1764 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1737 _cbwdq(T, T); | 1765 break; |
1738 _idiv(T, Src1, T); | 1766 case IceType_i16: |
1739 _mov(Dest, T); | 1767 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
1740 } else { | 1768 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
1741 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1769 break; |
1742 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1770 case IceType_i8: |
1743 _cbwdq(T_edx, T); | 1771 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
1744 _idiv(T, Src1, T_edx); | 1772 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1745 _mov(Dest, T); | 1773 break; |
1746 } | 1774 } |
| 1775 _cbwdq(T_edx, T); |
| 1776 _idiv(T, Src1, T_edx); |
| 1777 _mov(Dest, T); |
1747 break; | 1778 break; |
1748 case InstArithmetic::Urem: | 1779 case InstArithmetic::Urem: |
1749 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1780 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1750 if (isByteSizedArithType(Dest->getType())) { | 1781 if (isByteSizedArithType(Dest->getType())) { |
1751 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1782 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
1752 Context.insert(InstFakeDef::create(Func, T_eax)); | 1783 Context.insert(InstFakeDef::create(Func, T_eax)); |
1753 _xor(T_eax, T_eax); | 1784 _xor(T_eax, T_eax); |
1754 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1785 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1755 _div(T, Src1, T); | 1786 _div(T, Src1, T); |
1756 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 1787 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
1757 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1788 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
1758 // this ever becomes a problem we can introduce a pseudo rem instruction | 1789 // this ever becomes a problem we can introduce a pseudo rem instruction |
1759 // that returns the remainder in %al directly (and uses a mov for copying | 1790 // that returns the remainder in %al directly (and uses a mov for copying |
1760 // %ah to %al.) | 1791 // %ah to %al.) |
1761 static constexpr uint8_t AlSizeInBits = 8; | 1792 static constexpr uint8_t AlSizeInBits = 8; |
1762 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1793 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
1763 _mov(Dest, T); | 1794 _mov(Dest, T); |
1764 Context.insert(InstFakeUse::create(Func, T_eax)); | 1795 Context.insert(InstFakeUse::create(Func, T_eax)); |
1765 } else { | 1796 } else { |
1766 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1797 Type Ty = Dest->getType(); |
1767 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | 1798 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1799 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1800 switch (Ty) { |
| 1801 default: |
| 1802 llvm_unreachable("Bad type for urem"); |
| 1803 // fallthrough |
| 1804 case IceType_i32: |
| 1805 break; |
| 1806 case IceType_i16: |
| 1807 Eax = Traits::RegisterSet::Reg_ax; |
| 1808 Edx = Traits::RegisterSet::Reg_dx; |
| 1809 break; |
| 1810 } |
| 1811 Constant *Zero = Ctx->getConstantZero(Ty); |
| 1812 T_edx = makeReg(Dest->getType(), Edx); |
1768 _mov(T_edx, Zero); | 1813 _mov(T_edx, Zero); |
1769 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1814 _mov(T, Src0, Eax); |
1770 _div(T_edx, Src1, T); | 1815 _div(T_edx, Src1, T); |
1771 _mov(Dest, T_edx); | 1816 _mov(Dest, T_edx); |
1772 } | 1817 } |
1773 break; | 1818 break; |
1774 case InstArithmetic::Srem: | 1819 case InstArithmetic::Srem: |
1775 // TODO(stichnot): Enable this after doing better performance and cross | 1820 // TODO(stichnot): Enable this after doing better performance and cross |
1776 // testing. | 1821 // testing. |
1777 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1822 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
1778 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 1823 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
1779 // keep things simple there. | 1824 // keep things simple there. |
(...skipping 26 matching lines...) Expand all Loading... |
1806 _add(T, Src0); | 1851 _add(T, Src0); |
1807 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
1808 _sub(T, Src0); | 1853 _sub(T, Src0); |
1809 _neg(T); | 1854 _neg(T); |
1810 _mov(Dest, T); | 1855 _mov(Dest, T); |
1811 return; | 1856 return; |
1812 } | 1857 } |
1813 } | 1858 } |
1814 } | 1859 } |
1815 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1816 if (isByteSizedArithType(Dest->getType())) { | 1861 switch (Type Ty = Dest->getType()) { |
1817 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1862 default: |
1818 // T is %al. | 1863 llvm_unreachable("Bad type for srem"); |
1819 _cbwdq(T, T); | 1864 // fallthrough |
1820 _idiv(T, Src1, T); | 1865 case IceType_i32: |
1821 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1866 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
1822 Context.insert(InstFakeDef::create(Func, T_eax)); | |
1823 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | |
1824 // mov %ah, %al because it would make x86-64 codegen more complicated. If | |
1825 // this ever becomes a problem we can introduce a pseudo rem instruction | |
1826 // that returns the remainder in %al directly (and uses a mov for copying | |
1827 // %ah to %al.) | |
1828 static constexpr uint8_t AlSizeInBits = 8; | |
1829 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | |
1830 _mov(Dest, T); | |
1831 Context.insert(InstFakeUse::create(Func, T_eax)); | |
1832 } else { | |
1833 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | |
1834 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1867 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1835 _cbwdq(T_edx, T); | 1868 _cbwdq(T_edx, T); |
1836 _idiv(T_edx, Src1, T); | 1869 _idiv(T_edx, Src1, T); |
1837 _mov(Dest, T_edx); | 1870 _mov(Dest, T_edx); |
| 1871 break; |
| 1872 case IceType_i16: |
| 1873 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
| 1874 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
| 1875 _cbwdq(T_edx, T); |
| 1876 _idiv(T_edx, Src1, T); |
| 1877 _mov(Dest, T_edx); |
| 1878 break; |
| 1879 case IceType_i8: |
| 1880 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
| 1881 // TODO(stichnot): Use register ah for T_edx, and remove the _shr(). |
| 1882 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah); |
| 1883 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1884 _cbwdq(T_edx, T); |
| 1885 _idiv(T_edx, Src1, T); |
| 1886 static constexpr uint8_t AlSizeInBits = 8; |
| 1887 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits)); |
| 1888 _mov(Dest, T_edx); |
| 1889 break; |
1838 } | 1890 } |
1839 break; | 1891 break; |
1840 case InstArithmetic::Fadd: | 1892 case InstArithmetic::Fadd: |
1841 _mov(T, Src0); | 1893 _mov(T, Src0); |
1842 _addss(T, Src1); | 1894 _addss(T, Src1); |
1843 _mov(Dest, T); | 1895 _mov(Dest, T); |
1844 break; | 1896 break; |
1845 case InstArithmetic::Fsub: | 1897 case InstArithmetic::Fsub: |
1846 _mov(T, Src0); | 1898 _mov(T, Src0); |
1847 _subss(T, Src1); | 1899 _subss(T, Src1); |
1848 _mov(Dest, T); | 1900 _mov(Dest, T); |
1849 break; | 1901 break; |
1850 case InstArithmetic::Fmul: | 1902 case InstArithmetic::Fmul: |
1851 _mov(T, Src0); | 1903 _mov(T, Src0); |
1852 _mulss(T, Src0 == Src1 ? T : Src1); | 1904 _mulss(T, Src0 == Src1 ? T : Src1); |
1853 _mov(Dest, T); | 1905 _mov(Dest, T); |
1854 break; | 1906 break; |
1855 case InstArithmetic::Fdiv: | 1907 case InstArithmetic::Fdiv: |
1856 _mov(T, Src0); | 1908 _mov(T, Src0); |
1857 _divss(T, Src1); | 1909 _divss(T, Src1); |
1858 _mov(Dest, T); | 1910 _mov(Dest, T); |
1859 break; | 1911 break; |
1860 case InstArithmetic::Frem: { | 1912 case InstArithmetic::Frem: { |
1861 const SizeT MaxSrcs = 2; | 1913 constexpr SizeT MaxSrcs = 2; |
1862 Type Ty = Dest->getType(); | 1914 Type Ty = Dest->getType(); |
1863 InstCall *Call = makeHelperCall( | 1915 InstCall *Call = makeHelperCall( |
1864 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 1916 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
1865 Call->addArg(Src0); | 1917 Call->addArg(Src0); |
1866 Call->addArg(Src1); | 1918 Call->addArg(Src1); |
1867 return lowerCall(Call); | 1919 return lowerCall(Call); |
1868 } | 1920 } |
1869 } | 1921 } |
1870 } | 1922 } |
1871 | 1923 |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2106 if (isVectorType(Dest->getType())) { | 2158 if (isVectorType(Dest->getType())) { |
2107 assert(Dest->getType() == IceType_v4i32 && | 2159 assert(Dest->getType() == IceType_v4i32 && |
2108 Inst->getSrc(0)->getType() == IceType_v4f32); | 2160 Inst->getSrc(0)->getType() == IceType_v4f32); |
2109 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2161 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2110 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2162 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
2111 Src0RM = legalizeToReg(Src0RM); | 2163 Src0RM = legalizeToReg(Src0RM); |
2112 Variable *T = makeReg(Dest->getType()); | 2164 Variable *T = makeReg(Dest->getType()); |
2113 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2165 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
2114 _movp(Dest, T); | 2166 _movp(Dest, T); |
2115 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2167 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
2116 const SizeT MaxSrcs = 1; | 2168 constexpr SizeT MaxSrcs = 1; |
2117 Type SrcType = Inst->getSrc(0)->getType(); | 2169 Type SrcType = Inst->getSrc(0)->getType(); |
2118 InstCall *Call = | 2170 InstCall *Call = |
2119 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 2171 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
2120 : H_fptosi_f64_i64, | 2172 : H_fptosi_f64_i64, |
2121 Dest, MaxSrcs); | 2173 Dest, MaxSrcs); |
2122 Call->addArg(Inst->getSrc(0)); | 2174 Call->addArg(Inst->getSrc(0)); |
2123 lowerCall(Call); | 2175 lowerCall(Call); |
2124 } else { | 2176 } else { |
2125 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2177 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2126 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2178 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
(...skipping 10 matching lines...) Expand all Loading... |
2137 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2189 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
2138 if (Dest->getType() == IceType_i1) | 2190 if (Dest->getType() == IceType_i1) |
2139 _and(T_2, Ctx->getConstantInt1(1)); | 2191 _and(T_2, Ctx->getConstantInt1(1)); |
2140 _mov(Dest, T_2); | 2192 _mov(Dest, T_2); |
2141 } | 2193 } |
2142 break; | 2194 break; |
2143 case InstCast::Fptoui: | 2195 case InstCast::Fptoui: |
2144 if (isVectorType(Dest->getType())) { | 2196 if (isVectorType(Dest->getType())) { |
2145 assert(Dest->getType() == IceType_v4i32 && | 2197 assert(Dest->getType() == IceType_v4i32 && |
2146 Inst->getSrc(0)->getType() == IceType_v4f32); | 2198 Inst->getSrc(0)->getType() == IceType_v4f32); |
2147 const SizeT MaxSrcs = 1; | 2199 constexpr SizeT MaxSrcs = 1; |
2148 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2200 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
2149 Call->addArg(Inst->getSrc(0)); | 2201 Call->addArg(Inst->getSrc(0)); |
2150 lowerCall(Call); | 2202 lowerCall(Call); |
2151 } else if (Dest->getType() == IceType_i64 || | 2203 } else if (Dest->getType() == IceType_i64 || |
2152 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 2204 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
2153 // Use a helper for both x86-32 and x86-64. | 2205 // Use a helper for both x86-32 and x86-64. |
2154 const SizeT MaxSrcs = 1; | 2206 constexpr SizeT MaxSrcs = 1; |
2155 Type DestType = Dest->getType(); | 2207 Type DestType = Dest->getType(); |
2156 Type SrcType = Inst->getSrc(0)->getType(); | 2208 Type SrcType = Inst->getSrc(0)->getType(); |
2157 IceString TargetString; | 2209 IceString TargetString; |
2158 if (Traits::Is64Bit) { | 2210 if (Traits::Is64Bit) { |
2159 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
2160 : H_fptoui_f64_i64; | 2212 : H_fptoui_f64_i64; |
2161 } else if (isInt32Asserting32Or64(DestType)) { | 2213 } else if (isInt32Asserting32Or64(DestType)) { |
2162 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
2163 : H_fptoui_f64_i32; | 2215 : H_fptoui_f64_i32; |
2164 } else { | 2216 } else { |
(...skipping 28 matching lines...) Expand all Loading... |
2193 assert(Dest->getType() == IceType_v4f32 && | 2245 assert(Dest->getType() == IceType_v4f32 && |
2194 Inst->getSrc(0)->getType() == IceType_v4i32); | 2246 Inst->getSrc(0)->getType() == IceType_v4i32); |
2195 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2247 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2196 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2248 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
2197 Src0RM = legalizeToReg(Src0RM); | 2249 Src0RM = legalizeToReg(Src0RM); |
2198 Variable *T = makeReg(Dest->getType()); | 2250 Variable *T = makeReg(Dest->getType()); |
2199 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2251 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
2200 _movp(Dest, T); | 2252 _movp(Dest, T); |
2201 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2253 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
2202 // Use a helper for x86-32. | 2254 // Use a helper for x86-32. |
2203 const SizeT MaxSrcs = 1; | 2255 constexpr SizeT MaxSrcs = 1; |
2204 Type DestType = Dest->getType(); | 2256 Type DestType = Dest->getType(); |
2205 InstCall *Call = | 2257 InstCall *Call = |
2206 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2258 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 |
2207 : H_sitofp_i64_f64, | 2259 : H_sitofp_i64_f64, |
2208 Dest, MaxSrcs); | 2260 Dest, MaxSrcs); |
2209 // TODO: Call the correct compiler-rt helper function. | 2261 // TODO: Call the correct compiler-rt helper function. |
2210 Call->addArg(Inst->getSrc(0)); | 2262 Call->addArg(Inst->getSrc(0)); |
2211 lowerCall(Call); | 2263 lowerCall(Call); |
2212 return; | 2264 return; |
2213 } else { | 2265 } else { |
(...skipping 14 matching lines...) Expand all Loading... |
2228 _movsx(T_1, Src0RM); | 2280 _movsx(T_1, Src0RM); |
2229 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2281 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
2230 _mov(Dest, T_2); | 2282 _mov(Dest, T_2); |
2231 } | 2283 } |
2232 break; | 2284 break; |
2233 case InstCast::Uitofp: { | 2285 case InstCast::Uitofp: { |
2234 Operand *Src0 = Inst->getSrc(0); | 2286 Operand *Src0 = Inst->getSrc(0); |
2235 if (isVectorType(Src0->getType())) { | 2287 if (isVectorType(Src0->getType())) { |
2236 assert(Dest->getType() == IceType_v4f32 && | 2288 assert(Dest->getType() == IceType_v4f32 && |
2237 Src0->getType() == IceType_v4i32); | 2289 Src0->getType() == IceType_v4i32); |
2238 const SizeT MaxSrcs = 1; | 2290 constexpr SizeT MaxSrcs = 1; |
2239 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2291 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
2240 Call->addArg(Src0); | 2292 Call->addArg(Src0); |
2241 lowerCall(Call); | 2293 lowerCall(Call); |
2242 } else if (Src0->getType() == IceType_i64 || | 2294 } else if (Src0->getType() == IceType_i64 || |
2243 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { | 2295 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
2244 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on | 2296 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on |
2245 // x86-32. | 2297 // x86-32. |
2246 const SizeT MaxSrcs = 1; | 2298 constexpr SizeT MaxSrcs = 1; |
2247 Type DestType = Dest->getType(); | 2299 Type DestType = Dest->getType(); |
2248 IceString TargetString; | 2300 IceString TargetString; |
2249 if (isInt32Asserting32Or64(Src0->getType())) { | 2301 if (isInt32Asserting32Or64(Src0->getType())) { |
2250 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2302 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 |
2251 : H_uitofp_i32_f64; | 2303 : H_uitofp_i32_f64; |
2252 } else { | 2304 } else { |
2253 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2305 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 |
2254 : H_uitofp_i64_f64; | 2306 : H_uitofp_i64_f64; |
2255 } | 2307 } |
2256 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2308 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2452 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2504 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
2453 ConstantInteger32 *ElementIndex = | 2505 ConstantInteger32 *ElementIndex = |
2454 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2506 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
2455 // Only constant indices are allowed in PNaCl IR. | 2507 // Only constant indices are allowed in PNaCl IR. |
2456 assert(ElementIndex); | 2508 assert(ElementIndex); |
2457 | 2509 |
2458 unsigned Index = ElementIndex->getValue(); | 2510 unsigned Index = ElementIndex->getValue(); |
2459 Type Ty = SourceVectNotLegalized->getType(); | 2511 Type Ty = SourceVectNotLegalized->getType(); |
2460 Type ElementTy = typeElementType(Ty); | 2512 Type ElementTy = typeElementType(Ty); |
2461 Type InVectorElementTy = Traits::getInVectorElementType(Ty); | 2513 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
2462 Variable *ExtractedElementR = makeReg(InVectorElementTy); | |
2463 | 2514 |
2464 // TODO(wala): Determine the best lowering sequences for each type. | 2515 // TODO(wala): Determine the best lowering sequences for each type. |
2465 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 2516 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
2466 InstructionSet >= Traits::SSE4_1; | 2517 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32); |
2467 if (CanUsePextr && Ty != IceType_v4f32) { | 2518 Variable *ExtractedElementR = |
2468 // Use pextrb, pextrw, or pextrd. | 2519 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy); |
| 2520 if (CanUsePextr) { |
| 2521 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper |
| 2522 // bits of the destination register, so we represent this by always |
| 2523 // extracting into an i32 register. The _mov into Dest below will do |
| 2524 // truncation as necessary. |
2469 Constant *Mask = Ctx->getConstantInt32(Index); | 2525 Constant *Mask = Ctx->getConstantInt32(Index); |
2470 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); | 2526 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); |
2471 _pextr(ExtractedElementR, SourceVectR, Mask); | 2527 _pextr(ExtractedElementR, SourceVectR, Mask); |
2472 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2528 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2473 // Use pshufd and movd/movss. | 2529 // Use pshufd and movd/movss. |
2474 Variable *T = nullptr; | 2530 Variable *T = nullptr; |
2475 if (Index) { | 2531 if (Index) { |
2476 // The shuffle only needs to occur if the element to be extracted is not | 2532 // The shuffle only needs to occur if the element to be extracted is not |
2477 // at the lowest index. | 2533 // at the lowest index. |
2478 Constant *Mask = Ctx->getConstantInt32(Index); | 2534 Constant *Mask = Ctx->getConstantInt32(Index); |
(...skipping 492 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2971 // Use insertps, pinsrb, pinsrw, or pinsrd. | 3027 // Use insertps, pinsrb, pinsrw, or pinsrd. |
2972 Operand *ElementRM = | 3028 Operand *ElementRM = |
2973 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3029 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
2974 Operand *SourceVectRM = | 3030 Operand *SourceVectRM = |
2975 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3031 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2976 Variable *T = makeReg(Ty); | 3032 Variable *T = makeReg(Ty); |
2977 _movp(T, SourceVectRM); | 3033 _movp(T, SourceVectRM); |
2978 if (Ty == IceType_v4f32) | 3034 if (Ty == IceType_v4f32) |
2979 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3035 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
2980 else | 3036 else |
| 3037 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source |
| 3038 // operand is a register, it must be a full r32 register like eax, and not |
| 3039 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates |
| 3040 // for the use of r16 and r8 by converting them through getBaseReg(), |
| 3041 // while emitIAS() validates that the original and base register encodings |
| 3042 // are the same. But for an "interior" register like ah, it should |
| 3043 // probably be copied into an r32 via movzx so that the types work out. |
2981 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 3044 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); |
2982 _movp(Inst->getDest(), T); | 3045 _movp(Inst->getDest(), T); |
2983 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 3046 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2984 // Use shufps or movss. | 3047 // Use shufps or movss. |
2985 Variable *ElementR = nullptr; | 3048 Variable *ElementR = nullptr; |
2986 Operand *SourceVectRM = | 3049 Operand *SourceVectRM = |
2987 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3050 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2988 | 3051 |
2989 if (InVectorElementTy == IceType_f32) { | 3052 if (InVectorElementTy == IceType_f32) { |
2990 // ElementR will be in an XMM register since it is floating point. | 3053 // ElementR will be in an XMM register since it is floating point. |
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3305 // well-defined value. | 3368 // well-defined value. |
3306 Operand *Val = legalize(Instr->getArg(0)); | 3369 Operand *Val = legalize(Instr->getArg(0)); |
3307 Operand *FirstVal; | 3370 Operand *FirstVal; |
3308 Operand *SecondVal = nullptr; | 3371 Operand *SecondVal = nullptr; |
3309 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { | 3372 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
3310 FirstVal = loOperand(Val); | 3373 FirstVal = loOperand(Val); |
3311 SecondVal = hiOperand(Val); | 3374 SecondVal = hiOperand(Val); |
3312 } else { | 3375 } else { |
3313 FirstVal = Val; | 3376 FirstVal = Val; |
3314 } | 3377 } |
3315 const bool IsCttz = false; | 3378 constexpr bool IsCttz = false; |
3316 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3379 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
3317 SecondVal); | 3380 SecondVal); |
3318 return; | 3381 return; |
3319 } | 3382 } |
3320 case Intrinsics::Cttz: { | 3383 case Intrinsics::Cttz: { |
3321 // The "is zero undef" parameter is ignored and we always return a | 3384 // The "is zero undef" parameter is ignored and we always return a |
3322 // well-defined value. | 3385 // well-defined value. |
3323 Operand *Val = legalize(Instr->getArg(0)); | 3386 Operand *Val = legalize(Instr->getArg(0)); |
3324 Operand *FirstVal; | 3387 Operand *FirstVal; |
3325 Operand *SecondVal = nullptr; | 3388 Operand *SecondVal = nullptr; |
3326 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { | 3389 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
3327 FirstVal = hiOperand(Val); | 3390 FirstVal = hiOperand(Val); |
3328 SecondVal = loOperand(Val); | 3391 SecondVal = loOperand(Val); |
3329 } else { | 3392 } else { |
3330 FirstVal = Val; | 3393 FirstVal = Val; |
3331 } | 3394 } |
3332 const bool IsCttz = true; | 3395 constexpr bool IsCttz = true; |
3333 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3396 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
3334 SecondVal); | 3397 SecondVal); |
3335 return; | 3398 return; |
3336 } | 3399 } |
3337 case Intrinsics::Fabs: { | 3400 case Intrinsics::Fabs: { |
3338 Operand *Src = legalize(Instr->getArg(0)); | 3401 Operand *Src = legalize(Instr->getArg(0)); |
3339 Type Ty = Src->getType(); | 3402 Type Ty = Src->getType(); |
3340 Variable *Dest = Instr->getDest(); | 3403 Variable *Dest = Instr->getDest(); |
3341 Variable *T = makeVectorOfFabsMask(Ty); | 3404 Variable *T = makeVectorOfFabsMask(Ty); |
3342 // The pand instruction operates on an m128 memory operand, so if Src is an | 3405 // The pand instruction operates on an m128 memory operand, so if Src is an |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3420 Func->setError("Should not be lowering UnknownIntrinsic"); | 3483 Func->setError("Should not be lowering UnknownIntrinsic"); |
3421 return; | 3484 return; |
3422 } | 3485 } |
3423 return; | 3486 return; |
3424 } | 3487 } |
3425 | 3488 |
3426 template <class Machine> | 3489 template <class Machine> |
3427 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3490 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
3428 Operand *Ptr, Operand *Expected, | 3491 Operand *Ptr, Operand *Expected, |
3429 Operand *Desired) { | 3492 Operand *Desired) { |
3430 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { | 3493 Type Ty = Expected->getType(); |
| 3494 if (!Traits::Is64Bit && Ty == IceType_i64) { |
3431 // Reserve the pre-colored registers first, before adding any more | 3495 // Reserve the pre-colored registers first, before adding any more |
3432 // infinite-weight variables from formMemoryOperand's legalization. | 3496 // infinite-weight variables from formMemoryOperand's legalization. |
3433 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3497 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
3434 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3498 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
3435 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3499 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
3436 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3500 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
3437 _mov(T_eax, loOperand(Expected)); | 3501 _mov(T_eax, loOperand(Expected)); |
3438 _mov(T_edx, hiOperand(Expected)); | 3502 _mov(T_edx, hiOperand(Expected)); |
3439 _mov(T_ebx, loOperand(Desired)); | 3503 _mov(T_ebx, loOperand(Desired)); |
3440 _mov(T_ecx, hiOperand(Desired)); | 3504 _mov(T_ecx, hiOperand(Desired)); |
3441 typename Traits::X86OperandMem *Addr = | 3505 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3442 formMemoryOperand(Ptr, Expected->getType()); | 3506 constexpr bool Locked = true; |
3443 const bool Locked = true; | |
3444 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3507 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3445 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3508 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
3446 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3509 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
3447 _mov(DestLo, T_eax); | 3510 _mov(DestLo, T_eax); |
3448 _mov(DestHi, T_edx); | 3511 _mov(DestHi, T_edx); |
3449 return; | 3512 return; |
3450 } | 3513 } |
3451 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); | 3514 int32_t Eax; |
| 3515 switch (Ty) { |
| 3516 default: |
| 3517 llvm_unreachable("Bad type for cmpxchg"); |
| 3518 // fallthrough |
| 3519 case IceType_i32: |
| 3520 Eax = Traits::RegisterSet::Reg_eax; |
| 3521 break; |
| 3522 case IceType_i16: |
| 3523 Eax = Traits::RegisterSet::Reg_ax; |
| 3524 break; |
| 3525 case IceType_i8: |
| 3526 Eax = Traits::RegisterSet::Reg_al; |
| 3527 break; |
| 3528 } |
| 3529 Variable *T_eax = makeReg(Ty, Eax); |
3452 _mov(T_eax, Expected); | 3530 _mov(T_eax, Expected); |
3453 typename Traits::X86OperandMem *Addr = | 3531 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3454 formMemoryOperand(Ptr, Expected->getType()); | |
3455 Variable *DesiredReg = legalizeToReg(Desired); | 3532 Variable *DesiredReg = legalizeToReg(Desired); |
3456 const bool Locked = true; | 3533 constexpr bool Locked = true; |
3457 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3534 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
3458 _mov(DestPrev, T_eax); | 3535 _mov(DestPrev, T_eax); |
3459 } | 3536 } |
3460 | 3537 |
3461 template <class Machine> | 3538 template <class Machine> |
3462 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 3539 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
3463 Operand *PtrToMem, | 3540 Operand *PtrToMem, |
3464 Operand *Expected, | 3541 Operand *Expected, |
3465 Operand *Desired) { | 3542 Operand *Desired) { |
3466 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3543 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3548 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3625 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3549 // All the fall-through paths must set this to true, but use this | 3626 // All the fall-through paths must set this to true, but use this |
3550 // for asserting. | 3627 // for asserting. |
3551 NeedsCmpxchg = true; | 3628 NeedsCmpxchg = true; |
3552 Op_Lo = &TargetX86Base<Machine>::_add; | 3629 Op_Lo = &TargetX86Base<Machine>::_add; |
3553 Op_Hi = &TargetX86Base<Machine>::_adc; | 3630 Op_Hi = &TargetX86Base<Machine>::_adc; |
3554 break; | 3631 break; |
3555 } | 3632 } |
3556 typename Traits::X86OperandMem *Addr = | 3633 typename Traits::X86OperandMem *Addr = |
3557 formMemoryOperand(Ptr, Dest->getType()); | 3634 formMemoryOperand(Ptr, Dest->getType()); |
3558 const bool Locked = true; | 3635 constexpr bool Locked = true; |
3559 Variable *T = nullptr; | 3636 Variable *T = nullptr; |
3560 _mov(T, Val); | 3637 _mov(T, Val); |
3561 _xadd(Addr, T, Locked); | 3638 _xadd(Addr, T, Locked); |
3562 _mov(Dest, T); | 3639 _mov(Dest, T); |
3563 return; | 3640 return; |
3564 } | 3641 } |
3565 case Intrinsics::AtomicSub: { | 3642 case Intrinsics::AtomicSub: { |
3566 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3643 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3567 NeedsCmpxchg = true; | 3644 NeedsCmpxchg = true; |
3568 Op_Lo = &TargetX86Base<Machine>::_sub; | 3645 Op_Lo = &TargetX86Base<Machine>::_sub; |
3569 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3646 Op_Hi = &TargetX86Base<Machine>::_sbb; |
3570 break; | 3647 break; |
3571 } | 3648 } |
3572 typename Traits::X86OperandMem *Addr = | 3649 typename Traits::X86OperandMem *Addr = |
3573 formMemoryOperand(Ptr, Dest->getType()); | 3650 formMemoryOperand(Ptr, Dest->getType()); |
3574 const bool Locked = true; | 3651 constexpr bool Locked = true; |
3575 Variable *T = nullptr; | 3652 Variable *T = nullptr; |
3576 _mov(T, Val); | 3653 _mov(T, Val); |
3577 _neg(T); | 3654 _neg(T); |
3578 _xadd(Addr, T, Locked); | 3655 _xadd(Addr, T, Locked); |
3579 _mov(Dest, T); | 3656 _mov(Dest, T); |
3580 return; | 3657 return; |
3581 } | 3658 } |
3582 case Intrinsics::AtomicOr: | 3659 case Intrinsics::AtomicOr: |
3583 // TODO(jvoung): If Dest is null or dead, then some of these | 3660 // TODO(jvoung): If Dest is null or dead, then some of these |
3584 // operations do not need an "exchange", but just a locked op. | 3661 // operations do not need an "exchange", but just a locked op. |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3672 _mov(T_ecx, T_edx); | 3749 _mov(T_ecx, T_edx); |
3673 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3750 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
3674 } else { | 3751 } else { |
3675 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3752 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
3676 // It just needs the Val loaded into ebx and ecx. | 3753 // It just needs the Val loaded into ebx and ecx. |
3677 // That can also be done before the loop. | 3754 // That can also be done before the loop. |
3678 _mov(T_ebx, loOperand(Val)); | 3755 _mov(T_ebx, loOperand(Val)); |
3679 _mov(T_ecx, hiOperand(Val)); | 3756 _mov(T_ecx, hiOperand(Val)); |
3680 Context.insert(Label); | 3757 Context.insert(Label); |
3681 } | 3758 } |
3682 const bool Locked = true; | 3759 constexpr bool Locked = true; |
3683 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3760 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3684 _br(Traits::Cond::Br_ne, Label); | 3761 _br(Traits::Cond::Br_ne, Label); |
3685 if (!IsXchg8b) { | 3762 if (!IsXchg8b) { |
3686 // If Val is a variable, model the extended live range of Val through | 3763 // If Val is a variable, model the extended live range of Val through |
3687 // the end of the loop, since it will be re-used by the loop. | 3764 // the end of the loop, since it will be re-used by the loop. |
3688 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3765 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3689 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3766 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
3690 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3767 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
3691 Context.insert(InstFakeUse::create(Func, ValLo)); | 3768 Context.insert(InstFakeUse::create(Func, ValLo)); |
3692 Context.insert(InstFakeUse::create(Func, ValHi)); | 3769 Context.insert(InstFakeUse::create(Func, ValHi)); |
3693 } | 3770 } |
3694 } else { | 3771 } else { |
3695 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3772 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
3696 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3773 Context.insert(InstFakeUse::create(Func, T_ebx)); |
3697 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3774 Context.insert(InstFakeUse::create(Func, T_ecx)); |
3698 } | 3775 } |
3699 // The address base (if any) is also reused in the loop. | 3776 // The address base (if any) is also reused in the loop. |
3700 if (Variable *Base = Addr->getBase()) | 3777 if (Variable *Base = Addr->getBase()) |
3701 Context.insert(InstFakeUse::create(Func, Base)); | 3778 Context.insert(InstFakeUse::create(Func, Base)); |
3702 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3779 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3780 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3704 _mov(DestLo, T_eax); | 3781 _mov(DestLo, T_eax); |
3705 _mov(DestHi, T_edx); | 3782 _mov(DestHi, T_edx); |
3706 return; | 3783 return; |
3707 } | 3784 } |
3708 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3785 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3709 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); | 3786 int32_t Eax; |
| 3787 switch (Ty) { |
| 3788 default: |
| 3789 llvm_unreachable("Bad type for atomicRMW"); |
| 3790 // fallthrough |
| 3791 case IceType_i32: |
| 3792 Eax = Traits::RegisterSet::Reg_eax; |
| 3793 break; |
| 3794 case IceType_i16: |
| 3795 Eax = Traits::RegisterSet::Reg_ax; |
| 3796 break; |
| 3797 case IceType_i8: |
| 3798 Eax = Traits::RegisterSet::Reg_al; |
| 3799 break; |
| 3800 } |
| 3801 Variable *T_eax = makeReg(Ty, Eax); |
3710 _mov(T_eax, Addr); | 3802 _mov(T_eax, Addr); |
3711 typename Traits::Insts::Label *Label = | 3803 typename Traits::Insts::Label *Label = |
3712 Traits::Insts::Label::create(Func, this); | 3804 Traits::Insts::Label::create(Func, this); |
3713 Context.insert(Label); | 3805 Context.insert(Label); |
3714 // We want to pick a different register for T than Eax, so don't use | 3806 // We want to pick a different register for T than Eax, so don't use |
3715 // _mov(T == nullptr, T_eax). | 3807 // _mov(T == nullptr, T_eax). |
3716 Variable *T = makeReg(Ty); | 3808 Variable *T = makeReg(Ty); |
3717 _mov(T, T_eax); | 3809 _mov(T, T_eax); |
3718 (this->*Op_Lo)(T, Val); | 3810 (this->*Op_Lo)(T, Val); |
3719 const bool Locked = true; | 3811 constexpr bool Locked = true; |
3720 _cmpxchg(Addr, T_eax, T, Locked); | 3812 _cmpxchg(Addr, T_eax, T, Locked); |
3721 _br(Traits::Cond::Br_ne, Label); | 3813 _br(Traits::Cond::Br_ne, Label); |
3722 // If Val is a variable, model the extended live range of Val through | 3814 // If Val is a variable, model the extended live range of Val through |
3723 // the end of the loop, since it will be re-used by the loop. | 3815 // the end of the loop, since it will be re-used by the loop. |
3724 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3816 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3725 Context.insert(InstFakeUse::create(Func, ValVar)); | 3817 Context.insert(InstFakeUse::create(Func, ValVar)); |
3726 } | 3818 } |
3727 // The address base (if any) is also reused in the loop. | 3819 // The address base (if any) is also reused in the loop. |
3728 if (Variable *Base = Addr->getBase()) | 3820 if (Variable *Base = Addr->getBase()) |
3729 Context.insert(InstFakeUse::create(Func, Base)); | 3821 Context.insert(InstFakeUse::create(Func, Base)); |
(...skipping 1357 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5087 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 5179 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
5088 Ty == IceType_v16i8); | 5180 Ty == IceType_v16i8); |
5089 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 5181 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
5090 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 5182 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
5091 SizeT Shift = | 5183 SizeT Shift = |
5092 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 5184 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
5093 _psll(Reg, Ctx->getConstantInt8(Shift)); | 5185 _psll(Reg, Ctx->getConstantInt8(Shift)); |
5094 return Reg; | 5186 return Reg; |
5095 } else { | 5187 } else { |
5096 // SSE has no left shift operation for vectors of 8 bit integers. | 5188 // SSE has no left shift operation for vectors of 8 bit integers. |
5097 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 5189 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
5098 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 5190 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
5099 Variable *Reg = makeReg(Ty, RegNum); | 5191 Variable *Reg = makeReg(Ty, RegNum); |
5100 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 5192 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
5101 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 5193 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
5102 return Reg; | 5194 return Reg; |
5103 } | 5195 } |
5104 } | 5196 } |
5105 | 5197 |
5106 /// Construct a mask in a register that can be and'ed with a floating-point | 5198 /// Construct a mask in a register that can be and'ed with a floating-point |
5107 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 5199 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
(...skipping 13 matching lines...) Expand all Loading... |
5121 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 5213 typename TargetX86Base<Machine>::Traits::X86OperandMem * |
5122 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 5214 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
5123 uint32_t Offset) { | 5215 uint32_t Offset) { |
5124 // Ensure that Loc is a stack slot. | 5216 // Ensure that Loc is a stack slot. |
5125 assert(Slot->mustNotHaveReg()); | 5217 assert(Slot->mustNotHaveReg()); |
5126 assert(Slot->getRegNum() == Variable::NoRegister); | 5218 assert(Slot->getRegNum() == Variable::NoRegister); |
5127 // Compute the location of Loc in memory. | 5219 // Compute the location of Loc in memory. |
5128 // TODO(wala,stichnot): lea should not | 5220 // TODO(wala,stichnot): lea should not |
5129 // be required. The address of the stack slot is known at compile time | 5221 // be required. The address of the stack slot is known at compile time |
5130 // (although not until after addProlog()). | 5222 // (although not until after addProlog()). |
5131 const Type PointerType = IceType_i32; | 5223 constexpr Type PointerType = IceType_i32; |
5132 Variable *Loc = makeReg(PointerType); | 5224 Variable *Loc = makeReg(PointerType); |
5133 _lea(Loc, Slot); | 5225 _lea(Loc, Slot); |
5134 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5226 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
5135 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); | 5227 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); |
5136 } | 5228 } |
5137 | 5229 |
5138 /// Helper for legalize() to emit the right code to lower an operand to a | 5230 /// Helper for legalize() to emit the right code to lower an operand to a |
5139 /// register of the appropriate type. | 5231 /// register of the appropriate type. |
5140 template <class Machine> | 5232 template <class Machine> |
5141 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5233 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5174 if (Subst->mustHaveReg() && !Subst->hasReg()) { | 5266 if (Subst->mustHaveReg() && !Subst->hasReg()) { |
5175 // At this point we know the substitution will have a register. | 5267 // At this point we know the substitution will have a register. |
5176 if (From->getType() == Subst->getType()) { | 5268 if (From->getType() == Subst->getType()) { |
5177 // At this point we know the substitution's register is compatible. | 5269 // At this point we know the substitution's register is compatible. |
5178 return Subst; | 5270 return Subst; |
5179 } | 5271 } |
5180 } | 5272 } |
5181 } | 5273 } |
5182 } | 5274 } |
5183 | 5275 |
5184 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 5276 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { |
5185 // Before doing anything with a Mem operand, we need to ensure that the | 5277 // Before doing anything with a Mem operand, we need to ensure that the |
5186 // Base and Index components are in physical registers. | 5278 // Base and Index components are in physical registers. |
5187 Variable *Base = Mem->getBase(); | 5279 Variable *Base = Mem->getBase(); |
5188 Variable *Index = Mem->getIndex(); | 5280 Variable *Index = Mem->getIndex(); |
5189 Variable *RegBase = nullptr; | 5281 Variable *RegBase = nullptr; |
5190 Variable *RegIndex = nullptr; | 5282 Variable *RegIndex = nullptr; |
5191 if (Base) { | 5283 if (Base) { |
5192 RegBase = legalizeToReg(Base); | 5284 RegBase = legalizeToReg(Base); |
5193 } | 5285 } |
5194 if (Index) { | 5286 if (Index) { |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5252 // Immediate specifically not allowed | 5344 // Immediate specifically not allowed |
5253 NeedsReg = true; | 5345 NeedsReg = true; |
5254 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) | 5346 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) |
5255 // On x86, FP constants are lowered to mem operands. | 5347 // On x86, FP constants are lowered to mem operands. |
5256 NeedsReg = true; | 5348 NeedsReg = true; |
5257 if (NeedsReg) { | 5349 if (NeedsReg) { |
5258 From = copyToReg(From, RegNum); | 5350 From = copyToReg(From, RegNum); |
5259 } | 5351 } |
5260 return From; | 5352 return From; |
5261 } | 5353 } |
5262 if (auto Var = llvm::dyn_cast<Variable>(From)) { | 5354 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
5263 // Check if the variable is guaranteed a physical register. This can happen | 5355 // Check if the variable is guaranteed a physical register. This can happen |
5264 // either when the variable is pre-colored or when it is assigned infinite | 5356 // either when the variable is pre-colored or when it is assigned infinite |
5265 // weight. | 5357 // weight. |
5266 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 5358 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
5267 // We need a new physical register for the operand if: | 5359 // We need a new physical register for the operand if: |
5268 // Mem is not allowed and Var isn't guaranteed a physical | 5360 // Mem is not allowed and Var isn't guaranteed a physical |
5269 // register, or | 5361 // register, or |
5270 // RegNum is required and Var->getRegNum() doesn't match. | 5362 // RegNum is required and Var->getRegNum() doesn't match. |
5271 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 5363 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
5272 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5364 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5507 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); | 5599 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); |
5508 Immediate->setShouldBePooled(true); | 5600 Immediate->setShouldBePooled(true); |
5509 // if we have already assigned a phy register, we must come from | 5601 // if we have already assigned a phy register, we must come from |
5510 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 5602 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
5511 // assigned register as this assignment is that start of its use-def | 5603 // assigned register as this assignment is that start of its use-def |
5512 // chain. So we add RegNum argument here. | 5604 // chain. So we add RegNum argument here. |
5513 Variable *Reg = makeReg(Immediate->getType(), RegNum); | 5605 Variable *Reg = makeReg(Immediate->getType(), RegNum); |
5514 IceString Label; | 5606 IceString Label; |
5515 llvm::raw_string_ostream Label_stream(Label); | 5607 llvm::raw_string_ostream Label_stream(Label); |
5516 Immediate->emitPoolLabel(Label_stream, Ctx); | 5608 Immediate->emitPoolLabel(Label_stream, Ctx); |
5517 const RelocOffsetT Offset = 0; | 5609 constexpr RelocOffsetT Offset = 0; |
5518 const bool SuppressMangling = true; | 5610 constexpr bool SuppressMangling = true; |
5519 Constant *Symbol = | 5611 Constant *Symbol = |
5520 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); | 5612 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); |
5521 typename Traits::X86OperandMem *MemOperand = | 5613 typename Traits::X86OperandMem *MemOperand = |
5522 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, | 5614 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, |
5523 Symbol); | 5615 Symbol); |
5524 _mov(Reg, MemOperand); | 5616 _mov(Reg, MemOperand); |
5525 return Reg; | 5617 return Reg; |
5526 } | 5618 } |
5527 assert("Unsupported -randomize-pool-immediates option" && false); | 5619 assert("Unsupported -randomize-pool-immediates option" && false); |
5528 } | 5620 } |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5604 // phi lowering, we should not ask for new physical registers in | 5696 // phi lowering, we should not ask for new physical registers in |
5605 // general. However, if we do meet Memory Operand during phi lowering, | 5697 // general. However, if we do meet Memory Operand during phi lowering, |
5606 // we should not blind or pool the immediates for now. | 5698 // we should not blind or pool the immediates for now. |
5607 if (RegNum != Variable::NoRegister) | 5699 if (RegNum != Variable::NoRegister) |
5608 return MemOperand; | 5700 return MemOperand; |
5609 Variable *RegTemp = makeReg(IceType_i32); | 5701 Variable *RegTemp = makeReg(IceType_i32); |
5610 IceString Label; | 5702 IceString Label; |
5611 llvm::raw_string_ostream Label_stream(Label); | 5703 llvm::raw_string_ostream Label_stream(Label); |
5612 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); | 5704 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); |
5613 MemOperand->getOffset()->setShouldBePooled(true); | 5705 MemOperand->getOffset()->setShouldBePooled(true); |
5614 const RelocOffsetT SymOffset = 0; | 5706 constexpr RelocOffsetT SymOffset = 0; |
5615 bool SuppressMangling = true; | 5707 constexpr bool SuppressMangling = true; |
5616 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), | 5708 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), |
5617 SuppressMangling); | 5709 SuppressMangling); |
5618 typename Traits::X86OperandMem *SymbolOperand = | 5710 typename Traits::X86OperandMem *SymbolOperand = |
5619 Traits::X86OperandMem::create( | 5711 Traits::X86OperandMem::create( |
5620 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); | 5712 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); |
5621 _mov(RegTemp, SymbolOperand); | 5713 _mov(RegTemp, SymbolOperand); |
5622 // If we have a base variable here, we should add the lea instruction | 5714 // If we have a base variable here, we should add the lea instruction |
5623 // to add the value of the base variable to RegTemp. If there is no | 5715 // to add the value of the base variable to RegTemp. If there is no |
5624 // base variable, we won't need this lea instruction. | 5716 // base variable, we won't need this lea instruction. |
5625 if (MemOperand->getBase()) { | 5717 if (MemOperand->getBase()) { |
(...skipping 15 matching lines...) Expand all Loading... |
5641 } | 5733 } |
5642 // the offset is not eligible for blinding or pooling, return the original | 5734 // the offset is not eligible for blinding or pooling, return the original |
5643 // mem operand | 5735 // mem operand |
5644 return MemOperand; | 5736 return MemOperand; |
5645 } | 5737 } |
5646 | 5738 |
5647 } // end of namespace X86Internal | 5739 } // end of namespace X86Internal |
5648 } // end of namespace Ice | 5740 } // end of namespace Ice |
5649 | 5741 |
5650 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5742 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |