OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
580 Node->getInsts().insert(I3, RMW); | 580 Node->getInsts().insert(I3, RMW); |
581 } | 581 } |
582 } | 582 } |
583 if (Func->isVerbose(IceV_RMW)) | 583 if (Func->isVerbose(IceV_RMW)) |
584 Func->getContext()->unlockStr(); | 584 Func->getContext()->unlockStr(); |
585 } | 585 } |
586 | 586 |
587 // Converts a ConstantInteger32 operand into its constant value, or | 587 // Converts a ConstantInteger32 operand into its constant value, or |
588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | 588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. |
589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
591 return Integer->getValue(); | 591 return Integer->getValue(); |
592 return Intrinsics::MemoryOrderInvalid; | 592 return Intrinsics::MemoryOrderInvalid; |
593 } | 593 } |
594 | 594 |
595 /// Determines whether the dest of a Load instruction can be folded into one of | 595 /// Determines whether the dest of a Load instruction can be folded into one of |
596 /// the src operands of a 2-operand instruction. This is true as long as the | 596 /// the src operands of a 2-operand instruction. This is true as long as the |
597 /// load dest matches exactly one of the binary instruction's src operands. | 597 /// load dest matches exactly one of the binary instruction's src operands. |
598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. | 598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. |
599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | 599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
600 Operand *&Src0, Operand *&Src1) { | 600 Operand *&Src0, Operand *&Src1) { |
(...skipping 14 matching lines...) Expand all Loading... |
615 while (!Context.atEnd()) { | 615 while (!Context.atEnd()) { |
616 Variable *LoadDest = nullptr; | 616 Variable *LoadDest = nullptr; |
617 Operand *LoadSrc = nullptr; | 617 Operand *LoadSrc = nullptr; |
618 Inst *CurInst = Context.getCur(); | 618 Inst *CurInst = Context.getCur(); |
619 Inst *Next = Context.getNextInst(); | 619 Inst *Next = Context.getNextInst(); |
620 // Determine whether the current instruction is a Load instruction or | 620 // Determine whether the current instruction is a Load instruction or |
621 // equivalent. | 621 // equivalent. |
622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
623 // An InstLoad always qualifies. | 623 // An InstLoad always qualifies. |
624 LoadDest = Load->getDest(); | 624 LoadDest = Load->getDest(); |
625 const bool DoLegalize = false; | 625 constexpr bool DoLegalize = false; |
626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | 626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), |
627 LoadDest->getType(), DoLegalize); | 627 LoadDest->getType(), DoLegalize); |
628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | 628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { |
629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory | 629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory |
630 // ordering, and can be implemented in a single instruction (i.e., not | 630 // ordering, and can be implemented in a single instruction (i.e., not |
631 // i64 on x86-32). | 631 // i64 on x86-32). |
632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | 632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; |
633 if (ID == Intrinsics::AtomicLoad && | 633 if (ID == Intrinsics::AtomicLoad && |
634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && | 634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && |
635 Intrinsics::isMemoryOrderValid( | 635 Intrinsics::isMemoryOrderValid( |
636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | 636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { |
637 LoadDest = Intrin->getDest(); | 637 LoadDest = Intrin->getDest(); |
638 const bool DoLegalize = false; | 638 constexpr bool DoLegalize = false; |
639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | 639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), |
640 DoLegalize); | 640 DoLegalize); |
641 } | 641 } |
642 } | 642 } |
643 // A Load instruction can be folded into the following instruction only | 643 // A Load instruction can be folded into the following instruction only |
644 // if the following instruction ends the Load's Dest variable's live | 644 // if the following instruction ends the Load's Dest variable's live |
645 // range. | 645 // range. |
646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { | 646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { |
647 assert(LoadSrc); | 647 assert(LoadSrc); |
648 Inst *NewInst = nullptr; | 648 Inst *NewInst = nullptr; |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
726 // considered live upon function entry. Otherwise it's possible to get | 726 // considered live upon function entry. Otherwise it's possible to get |
727 // liveness validation errors for saving callee-save registers. | 727 // liveness validation errors for saving callee-save registers. |
728 Func->addImplicitArg(Reg); | 728 Func->addImplicitArg(Reg); |
729 // Don't bother tracking the live range of a named physical register. | 729 // Don't bother tracking the live range of a named physical register. |
730 Reg->setIgnoreLiveness(); | 730 Reg->setIgnoreLiveness(); |
731 } | 731 } |
732 return Reg; | 732 return Reg; |
733 } | 733 } |
734 | 734 |
735 template <class Machine> | 735 template <class Machine> |
736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const { |
737 return Traits::getRegName(RegNum, Ty); | 737 return Traits::getRegName(RegNum); |
738 } | 738 } |
739 | 739 |
740 template <class Machine> | 740 template <class Machine> |
741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { | 741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { |
742 if (!BuildDefs::dump()) | 742 if (!BuildDefs::dump()) |
743 return; | 743 return; |
744 Ostream &Str = Ctx->getStrEmit(); | 744 Ostream &Str = Ctx->getStrEmit(); |
745 if (Var->hasReg()) { | 745 if (Var->hasReg()) { |
746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); | 746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); |
747 return; | 747 return; |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
790 if (Var->mustHaveReg()) { | 790 if (Var->mustHaveReg()) { |
791 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 791 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
792 } | 792 } |
793 int32_t Offset = Var->getStackOffset(); | 793 int32_t Offset = Var->getStackOffset(); |
794 int32_t BaseRegNum = Var->getBaseRegNum(); | 794 int32_t BaseRegNum = Var->getBaseRegNum(); |
795 if (Var->getBaseRegNum() == Variable::NoRegister) { | 795 if (Var->getBaseRegNum() == Variable::NoRegister) { |
796 BaseRegNum = getFrameOrStackReg(); | 796 BaseRegNum = getFrameOrStackReg(); |
797 if (!hasFramePointer()) | 797 if (!hasFramePointer()) |
798 Offset += getStackAdjustment(); | 798 Offset += getStackAdjustment(); |
799 } | 799 } |
800 return typename Traits::Address( | 800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset, |
801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset, | 801 AssemblerFixup::NoFixup); |
802 AssemblerFixup::NoFixup); | |
803 } | 802 } |
804 | 803 |
805 /// Helper function for addProlog(). | 804 /// Helper function for addProlog(). |
806 /// | 805 /// |
807 /// This assumes Arg is an argument passed on the stack. This sets the frame | 806 /// This assumes Arg is an argument passed on the stack. This sets the frame |
808 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
809 /// I64 arg that has been split into Lo and Hi components, it calls itself | 808 /// I64 arg that has been split into Lo and Hi components, it calls itself |
810 /// recursively on the components, taking care to handle Lo first because of the | 809 /// recursively on the components, taking care to handle Lo first because of the |
811 /// little-endian architecture. Lastly, this function generates an instruction | 810 /// little-endian architecture. Lastly, this function generates an instruction |
812 /// to copy Arg into its assigned register if applicable. | 811 /// to copy Arg into its assigned register if applicable. |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1041 Src1 /= 2; | 1040 Src1 /= 2; |
1042 } else { | 1041 } else { |
1043 return false; | 1042 return false; |
1044 } | 1043 } |
1045 } | 1044 } |
1046 // Lea optimization only works for i16 and i32 types, not i8. | 1045 // Lea optimization only works for i16 and i32 types, not i8. |
1047 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) | 1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
1048 return false; | 1047 return false; |
1049 // Limit the number of lea/shl operations for a single multiply, to a | 1048 // Limit the number of lea/shl operations for a single multiply, to a |
1050 // somewhat arbitrary choice of 3. | 1049 // somewhat arbitrary choice of 3. |
1051 const uint32_t MaxOpsForOptimizedMul = 3; | 1050 constexpr uint32_t MaxOpsForOptimizedMul = 3; |
1052 if (CountOps > MaxOpsForOptimizedMul) | 1051 if (CountOps > MaxOpsForOptimizedMul) |
1053 return false; | 1052 return false; |
1054 _mov(T, Src0); | 1053 _mov(T, Src0); |
1055 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1054 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1056 for (uint32_t i = 0; i < Count9; ++i) { | 1055 for (uint32_t i = 0; i < Count9; ++i) { |
1057 const uint16_t Shift = 3; // log2(9-1) | 1056 constexpr uint16_t Shift = 3; // log2(9-1) |
1058 _lea(T, | 1057 _lea(T, |
1059 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1060 } | 1059 } |
1061 for (uint32_t i = 0; i < Count5; ++i) { | 1060 for (uint32_t i = 0; i < Count5; ++i) { |
1062 const uint16_t Shift = 2; // log2(5-1) | 1061 constexpr uint16_t Shift = 2; // log2(5-1) |
1063 _lea(T, | 1062 _lea(T, |
1064 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1065 } | 1064 } |
1066 for (uint32_t i = 0; i < Count3; ++i) { | 1065 for (uint32_t i = 0; i < Count3; ++i) { |
1067 const uint16_t Shift = 1; // log2(3-1) | 1066 constexpr uint16_t Shift = 1; // log2(3-1) |
1068 _lea(T, | 1067 _lea(T, |
1069 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); | 1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); |
1070 } | 1069 } |
1071 if (Count2) { | 1070 if (Count2) { |
1072 _shl(T, Ctx->getConstantInt(Ty, Count2)); | 1071 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
1073 } | 1072 } |
1074 if (Src1IsNegative) | 1073 if (Src1IsNegative) |
1075 _neg(T); | 1074 _neg(T); |
1076 _mov(Dest, T); | 1075 _mov(Dest, T); |
1077 return true; | 1076 return true; |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1209 } | 1208 } |
1210 } else { | 1209 } else { |
1211 // NON-CONSTANT CASES. | 1210 // NON-CONSTANT CASES. |
1212 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1211 Constant *BitTest = Ctx->getConstantInt32(0x20); |
1213 typename Traits::Insts::Label *Label = | 1212 typename Traits::Insts::Label *Label = |
1214 Traits::Insts::Label::create(Func, this); | 1213 Traits::Insts::Label::create(Func, this); |
1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==> | 1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==> |
1216 // t1:ecx = c.lo & 0xff | 1215 // t1:ecx = c.lo & 0xff |
1217 // t2 = b.lo | 1216 // t2 = b.lo |
1218 // t3 = b.hi | 1217 // t3 = b.hi |
1219 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); | 1218 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1219 _mov(T_1, Src1Lo); |
1220 _mov(T_2, Src0Lo); | 1220 _mov(T_2, Src0Lo); |
1221 _mov(T_3, Src0Hi); | 1221 _mov(T_3, Src0Hi); |
1222 switch (Op) { | 1222 switch (Op) { |
1223 default: | 1223 default: |
1224 assert(0 && "non-shift op"); | 1224 assert(0 && "non-shift op"); |
1225 break; | 1225 break; |
1226 case InstArithmetic::Shl: { | 1226 case InstArithmetic::Shl: { |
1227 // a=b<<c ==> | 1227 // a=b<<c ==> |
1228 // t3 = shld t3, t2, t1 | 1228 // t3 = shld t3, t2, t1 |
1229 // t2 = shl t2, t1 | 1229 // t2 = shl t2, t1 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1317 } | 1317 } |
1318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
1319 // These x86-32 helper-call-involved instructions are lowered in this | 1319 // These x86-32 helper-call-involved instructions are lowered in this |
1320 // separate switch. This is because loOperand() and hiOperand() may insert | 1320 // separate switch. This is because loOperand() and hiOperand() may insert |
1321 // redundant instructions for constant blinding and pooling. Such redundant | 1321 // redundant instructions for constant blinding and pooling. Such redundant |
1322 // instructions will fail liveness analysis under -Om1 setting. And, | 1322 // instructions will fail liveness analysis under -Om1 setting. And, |
1323 // actually these arguments do not need to be processed with loOperand() | 1323 // actually these arguments do not need to be processed with loOperand() |
1324 // and hiOperand() to be used. | 1324 // and hiOperand() to be used. |
1325 switch (Inst->getOp()) { | 1325 switch (Inst->getOp()) { |
1326 case InstArithmetic::Udiv: { | 1326 case InstArithmetic::Udiv: { |
1327 const SizeT MaxSrcs = 2; | 1327 constexpr SizeT MaxSrcs = 2; |
1328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); |
1329 Call->addArg(Inst->getSrc(0)); | 1329 Call->addArg(Inst->getSrc(0)); |
1330 Call->addArg(Inst->getSrc(1)); | 1330 Call->addArg(Inst->getSrc(1)); |
1331 lowerCall(Call); | 1331 lowerCall(Call); |
1332 return; | 1332 return; |
1333 } | 1333 } |
1334 case InstArithmetic::Sdiv: { | 1334 case InstArithmetic::Sdiv: { |
1335 const SizeT MaxSrcs = 2; | 1335 constexpr SizeT MaxSrcs = 2; |
1336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); | 1336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); |
1337 Call->addArg(Inst->getSrc(0)); | 1337 Call->addArg(Inst->getSrc(0)); |
1338 Call->addArg(Inst->getSrc(1)); | 1338 Call->addArg(Inst->getSrc(1)); |
1339 lowerCall(Call); | 1339 lowerCall(Call); |
1340 return; | 1340 return; |
1341 } | 1341 } |
1342 case InstArithmetic::Urem: { | 1342 case InstArithmetic::Urem: { |
1343 const SizeT MaxSrcs = 2; | 1343 constexpr SizeT MaxSrcs = 2; |
1344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); | 1344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); |
1345 Call->addArg(Inst->getSrc(0)); | 1345 Call->addArg(Inst->getSrc(0)); |
1346 Call->addArg(Inst->getSrc(1)); | 1346 Call->addArg(Inst->getSrc(1)); |
1347 lowerCall(Call); | 1347 lowerCall(Call); |
1348 return; | 1348 return; |
1349 } | 1349 } |
1350 case InstArithmetic::Srem: { | 1350 case InstArithmetic::Srem: { |
1351 const SizeT MaxSrcs = 2; | 1351 constexpr SizeT MaxSrcs = 2; |
1352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); | 1352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); |
1353 Call->addArg(Inst->getSrc(0)); | 1353 Call->addArg(Inst->getSrc(0)); |
1354 Call->addArg(Inst->getSrc(1)); | 1354 Call->addArg(Inst->getSrc(1)); |
1355 lowerCall(Call); | 1355 lowerCall(Call); |
1356 return; | 1356 return; |
1357 } | 1357 } |
1358 default: | 1358 default: |
1359 break; | 1359 break; |
1360 } | 1360 } |
1361 | 1361 |
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1522 // pmuludq T1, Src1 | 1522 // pmuludq T1, Src1 |
1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} | 1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
1524 // pmuludq T2, T3 | 1524 // pmuludq T2, T3 |
1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} | 1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} |
1526 // shufps T1, T2, {0,2,0,2} | 1526 // shufps T1, T2, {0,2,0,2} |
1527 // pshufd T4, T1, {0,2,1,3} | 1527 // pshufd T4, T1, {0,2,1,3} |
1528 // movups Dest, T4 | 1528 // movups Dest, T4 |
1529 | 1529 |
1530 // Mask that directs pshufd to create a vector with entries | 1530 // Mask that directs pshufd to create a vector with entries |
1531 // Src[1, 0, 3, 0] | 1531 // Src[1, 0, 3, 0] |
1532 const unsigned Constant1030 = 0x31; | 1532 constexpr unsigned Constant1030 = 0x31; |
1533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); | 1533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); |
1534 // Mask that directs shufps to create a vector with entries | 1534 // Mask that directs shufps to create a vector with entries |
1535 // Dest[0, 2], Src[0, 2] | 1535 // Dest[0, 2], Src[0, 2] |
1536 const unsigned Mask0202 = 0x88; | 1536 constexpr unsigned Mask0202 = 0x88; |
1537 // Mask that directs pshufd to create a vector with entries | 1537 // Mask that directs pshufd to create a vector with entries |
1538 // Src[0, 2, 1, 3] | 1538 // Src[0, 2, 1, 3] |
1539 const unsigned Mask0213 = 0xd8; | 1539 constexpr unsigned Mask0213 = 0xd8; |
1540 Variable *T1 = makeReg(IceType_v4i32); | 1540 Variable *T1 = makeReg(IceType_v4i32); |
1541 Variable *T2 = makeReg(IceType_v4i32); | 1541 Variable *T2 = makeReg(IceType_v4i32); |
1542 Variable *T3 = makeReg(IceType_v4i32); | 1542 Variable *T3 = makeReg(IceType_v4i32); |
1543 Variable *T4 = makeReg(IceType_v4i32); | 1543 Variable *T4 = makeReg(IceType_v4i32); |
1544 _movp(T1, Src0); | 1544 _movp(T1, Src0); |
1545 _pshufd(T2, Src0, Mask1030); | 1545 _pshufd(T2, Src0, Mask1030); |
1546 _pshufd(T3, Src1, Mask1030); | 1546 _pshufd(T3, Src1, Mask1030); |
1547 _pmuludq(T1, Src1); | 1547 _pmuludq(T1, Src1); |
1548 _pmuludq(T2, T3); | 1548 _pmuludq(T2, T3); |
1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); | 1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1624 _mov(T, Src0); | 1624 _mov(T, Src0); |
1625 _sub(T, Src1); | 1625 _sub(T, Src1); |
1626 _mov(Dest, T); | 1626 _mov(Dest, T); |
1627 break; | 1627 break; |
1628 case InstArithmetic::Mul: | 1628 case InstArithmetic::Mul: |
1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
1630 if (optimizeScalarMul(Dest, Src0, C->getValue())) | 1630 if (optimizeScalarMul(Dest, Src0, C->getValue())) |
1631 return; | 1631 return; |
1632 } | 1632 } |
1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must | 1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must |
1634 // be in eax. | 1634 // be in al. |
1635 if (isByteSizedArithType(Dest->getType())) { | 1635 if (isByteSizedArithType(Dest->getType())) { |
1636 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1636 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1638 _imul(T, Src0 == Src1 ? T : Src1); | 1638 _imul(T, Src0 == Src1 ? T : Src1); |
1639 _mov(Dest, T); | 1639 _mov(Dest, T); |
1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
1641 T = makeReg(Dest->getType()); | 1641 T = makeReg(Dest->getType()); |
1642 _imul_imm(T, Src0, ImmConst); | 1642 _imul_imm(T, Src0, ImmConst); |
1643 _mov(Dest, T); | 1643 _mov(Dest, T); |
1644 } else { | 1644 } else { |
1645 _mov(T, Src0); | 1645 _mov(T, Src0); |
1646 _imul(T, Src0 == Src1 ? T : Src1); | 1646 _imul(T, Src0 == Src1 ? T : Src1); |
1647 _mov(Dest, T); | 1647 _mov(Dest, T); |
1648 } | 1648 } |
1649 break; | 1649 break; |
1650 case InstArithmetic::Shl: | 1650 case InstArithmetic::Shl: |
1651 _mov(T, Src0); | 1651 _mov(T, Src0); |
1652 if (!llvm::isa<ConstantInteger32>(Src1)) | 1652 if (!llvm::isa<ConstantInteger32>(Src1)) { |
1653 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1653 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1654 _mov(Cl, Src1); |
| 1655 Src1 = Cl; |
| 1656 } |
1654 _shl(T, Src1); | 1657 _shl(T, Src1); |
1655 _mov(Dest, T); | 1658 _mov(Dest, T); |
1656 break; | 1659 break; |
1657 case InstArithmetic::Lshr: | 1660 case InstArithmetic::Lshr: |
1658 _mov(T, Src0); | 1661 _mov(T, Src0); |
1659 if (!llvm::isa<ConstantInteger32>(Src1)) | 1662 if (!llvm::isa<ConstantInteger32>(Src1)) { |
1660 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1663 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1664 _mov(Cl, Src1); |
| 1665 Src1 = Cl; |
| 1666 } |
1661 _shr(T, Src1); | 1667 _shr(T, Src1); |
1662 _mov(Dest, T); | 1668 _mov(Dest, T); |
1663 break; | 1669 break; |
1664 case InstArithmetic::Ashr: | 1670 case InstArithmetic::Ashr: |
1665 _mov(T, Src0); | 1671 _mov(T, Src0); |
1666 if (!llvm::isa<ConstantInteger32>(Src1)) | 1672 if (!llvm::isa<ConstantInteger32>(Src1)) { |
1667 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); | 1673 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); |
| 1674 _mov(Cl, Src1); |
| 1675 Src1 = Cl; |
| 1676 } |
1668 _sar(T, Src1); | 1677 _sar(T, Src1); |
1669 _mov(Dest, T); | 1678 _mov(Dest, T); |
1670 break; | 1679 break; |
1671 case InstArithmetic::Udiv: | 1680 case InstArithmetic::Udiv: |
1672 // div and idiv are the few arithmetic operators that do not allow | 1681 // div and idiv are the few arithmetic operators that do not allow |
1673 // immediates as the operand. | 1682 // immediates as the operand. |
1674 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1683 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1675 if (isByteSizedArithType(Dest->getType())) { | 1684 if (isByteSizedArithType(Dest->getType())) { |
1676 // For 8-bit unsigned division we need to zero-extend al into ah. A mov | 1685 // For 8-bit unsigned division we need to zero-extend al into ah. A mov |
1677 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 | 1686 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 |
1678 // assembler refuses to encode %ah (encoding %spl with a REX prefix | 1687 // assembler refuses to encode %ah (encoding %spl with a REX prefix |
1679 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah | 1688 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah |
1680 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and | 1689 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and |
1681 // d[%lh], which means the X86 target lowering (and the register | 1690 // d[%lh], which means the X86 target lowering (and the register |
1682 // allocator) would have to be aware of this restriction. For now, we | 1691 // allocator) would have to be aware of this restriction. For now, we |
1683 // simply zero %eax completely, and move the dividend into %al. | 1692 // simply zero %eax completely, and move the dividend into %al. |
1684 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1693 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
1685 Context.insert(InstFakeDef::create(Func, T_eax)); | 1694 Context.insert(InstFakeDef::create(Func, T_eax)); |
1686 _xor(T_eax, T_eax); | 1695 _xor(T_eax, T_eax); |
1687 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1696 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1688 _div(T, Src1, T); | 1697 _div(T, Src1, T); |
1689 _mov(Dest, T); | 1698 _mov(Dest, T); |
1690 Context.insert(InstFakeUse::create(Func, T_eax)); | 1699 Context.insert(InstFakeUse::create(Func, T_eax)); |
1691 } else { | 1700 } else { |
1692 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1701 Type Ty = Dest->getType(); |
1693 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1702 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
1694 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1703 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1704 switch (Ty) { |
| 1705 default: |
| 1706 llvm_unreachable("Bad type for udiv"); |
| 1707 // fallthrough |
| 1708 case IceType_i32: |
| 1709 break; |
| 1710 case IceType_i16: |
| 1711 Eax = Traits::RegisterSet::Reg_ax; |
| 1712 Edx = Traits::RegisterSet::Reg_dx; |
| 1713 break; |
| 1714 } |
| 1715 Constant *Zero = Ctx->getConstantZero(Ty); |
| 1716 _mov(T, Src0, Eax); |
| 1717 _mov(T_edx, Zero, Edx); |
1695 _div(T, Src1, T_edx); | 1718 _div(T, Src1, T_edx); |
1696 _mov(Dest, T); | 1719 _mov(Dest, T); |
1697 } | 1720 } |
1698 break; | 1721 break; |
1699 case InstArithmetic::Sdiv: | 1722 case InstArithmetic::Sdiv: |
1700 // TODO(stichnot): Enable this after doing better performance and cross | 1723 // TODO(stichnot): Enable this after doing better performance and cross |
1701 // testing. | 1724 // testing. |
1702 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1725 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
1703 // Optimize division by constant power of 2, but not for Om1 or O0, just | 1726 // Optimize division by constant power of 2, but not for Om1 or O0, just |
1704 // to keep things simple there. | 1727 // to keep things simple there. |
(...skipping 21 matching lines...) Expand all Loading... |
1726 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 1749 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
1727 _add(T, Src0); | 1750 _add(T, Src0); |
1728 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1751 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
1729 } | 1752 } |
1730 _mov(Dest, T); | 1753 _mov(Dest, T); |
1731 return; | 1754 return; |
1732 } | 1755 } |
1733 } | 1756 } |
1734 } | 1757 } |
1735 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1758 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1736 if (isByteSizedArithType(Dest->getType())) { | 1759 switch (Type Ty = Dest->getType()) { |
| 1760 default: |
| 1761 llvm_unreachable("Bad type for sdiv"); |
| 1762 // fallthrough |
| 1763 case IceType_i32: |
| 1764 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
1737 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1765 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1738 _cbwdq(T, T); | 1766 break; |
1739 _idiv(T, Src1, T); | 1767 case IceType_i16: |
1740 _mov(Dest, T); | 1768 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
1741 } else { | 1769 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
1742 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1770 break; |
1743 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1771 case IceType_i8: |
1744 _cbwdq(T_edx, T); | 1772 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
1745 _idiv(T, Src1, T_edx); | 1773 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1746 _mov(Dest, T); | 1774 break; |
1747 } | 1775 } |
| 1776 _cbwdq(T_edx, T); |
| 1777 _idiv(T, Src1, T_edx); |
| 1778 _mov(Dest, T); |
1748 break; | 1779 break; |
1749 case InstArithmetic::Urem: | 1780 case InstArithmetic::Urem: |
1750 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1781 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1751 if (isByteSizedArithType(Dest->getType())) { | 1782 if (isByteSizedArithType(Dest->getType())) { |
1752 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1783 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
1753 Context.insert(InstFakeDef::create(Func, T_eax)); | 1784 Context.insert(InstFakeDef::create(Func, T_eax)); |
1754 _xor(T_eax, T_eax); | 1785 _xor(T_eax, T_eax); |
1755 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1786 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1756 _div(T, Src1, T); | 1787 _div(T, Src1, T); |
1757 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 1788 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
1758 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1789 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
1759 // this ever becomes a problem we can introduce a pseudo rem instruction | 1790 // this ever becomes a problem we can introduce a pseudo rem instruction |
1760 // that returns the remainder in %al directly (and uses a mov for copying | 1791 // that returns the remainder in %al directly (and uses a mov for copying |
1761 // %ah to %al.) | 1792 // %ah to %al.) |
1762 static constexpr uint8_t AlSizeInBits = 8; | 1793 static constexpr uint8_t AlSizeInBits = 8; |
1763 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1794 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
1764 _mov(Dest, T); | 1795 _mov(Dest, T); |
1765 Context.insert(InstFakeUse::create(Func, T_eax)); | 1796 Context.insert(InstFakeUse::create(Func, T_eax)); |
1766 } else { | 1797 } else { |
1767 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1798 Type Ty = Dest->getType(); |
1768 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | 1799 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1800 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1801 switch (Ty) { |
| 1802 default: |
| 1803 llvm_unreachable("Bad type for urem"); |
| 1804 // fallthrough |
| 1805 case IceType_i32: |
| 1806 break; |
| 1807 case IceType_i16: |
| 1808 Eax = Traits::RegisterSet::Reg_ax; |
| 1809 Edx = Traits::RegisterSet::Reg_dx; |
| 1810 break; |
| 1811 } |
| 1812 Constant *Zero = Ctx->getConstantZero(Ty); |
| 1813 T_edx = makeReg(Dest->getType(), Edx); |
1769 _mov(T_edx, Zero); | 1814 _mov(T_edx, Zero); |
1770 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1815 _mov(T, Src0, Eax); |
1771 _div(T_edx, Src1, T); | 1816 _div(T_edx, Src1, T); |
1772 _mov(Dest, T_edx); | 1817 _mov(Dest, T_edx); |
1773 } | 1818 } |
1774 break; | 1819 break; |
1775 case InstArithmetic::Srem: | 1820 case InstArithmetic::Srem: |
1776 // TODO(stichnot): Enable this after doing better performance and cross | 1821 // TODO(stichnot): Enable this after doing better performance and cross |
1777 // testing. | 1822 // testing. |
1778 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1823 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
1779 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 1824 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
1780 // keep things simple there. | 1825 // keep things simple there. |
(...skipping 26 matching lines...) Expand all Loading... |
1807 _add(T, Src0); | 1852 _add(T, Src0); |
1808 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1853 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
1809 _sub(T, Src0); | 1854 _sub(T, Src0); |
1810 _neg(T); | 1855 _neg(T); |
1811 _mov(Dest, T); | 1856 _mov(Dest, T); |
1812 return; | 1857 return; |
1813 } | 1858 } |
1814 } | 1859 } |
1815 } | 1860 } |
1816 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1861 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1817 if (isByteSizedArithType(Dest->getType())) { | 1862 switch (Type Ty = Dest->getType()) { |
1818 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1863 default: |
1819 // T is %al. | 1864 llvm_unreachable("Bad type for srem"); |
1820 _cbwdq(T, T); | 1865 // fallthrough |
1821 _idiv(T, Src1, T); | 1866 case IceType_i32: |
1822 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1867 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
1823 Context.insert(InstFakeDef::create(Func, T_eax)); | |
1824 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | |
1825 // mov %ah, %al because it would make x86-64 codegen more complicated. If | |
1826 // this ever becomes a problem we can introduce a pseudo rem instruction | |
1827 // that returns the remainder in %al directly (and uses a mov for copying | |
1828 // %ah to %al.) | |
1829 static constexpr uint8_t AlSizeInBits = 8; | |
1830 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | |
1831 _mov(Dest, T); | |
1832 Context.insert(InstFakeUse::create(Func, T_eax)); | |
1833 } else { | |
1834 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); | |
1835 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1868 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1836 _cbwdq(T_edx, T); | 1869 _cbwdq(T_edx, T); |
1837 _idiv(T_edx, Src1, T); | 1870 _idiv(T_edx, Src1, T); |
1838 _mov(Dest, T_edx); | 1871 _mov(Dest, T_edx); |
| 1872 break; |
| 1873 case IceType_i16: |
| 1874 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
| 1875 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
| 1876 _cbwdq(T_edx, T); |
| 1877 _idiv(T_edx, Src1, T); |
| 1878 _mov(Dest, T_edx); |
| 1879 break; |
| 1880 case IceType_i8: |
| 1881 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
| 1882 // TODO(stichnot): Use register ah for T_edx, and remove the _shr(). |
| 1883 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah); |
| 1884 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1885 _cbwdq(T_edx, T); |
| 1886 _idiv(T_edx, Src1, T); |
| 1887 static constexpr uint8_t AlSizeInBits = 8; |
| 1888 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits)); |
| 1889 _mov(Dest, T_edx); |
| 1890 break; |
1839 } | 1891 } |
1840 break; | 1892 break; |
1841 case InstArithmetic::Fadd: | 1893 case InstArithmetic::Fadd: |
1842 _mov(T, Src0); | 1894 _mov(T, Src0); |
1843 _addss(T, Src1); | 1895 _addss(T, Src1); |
1844 _mov(Dest, T); | 1896 _mov(Dest, T); |
1845 break; | 1897 break; |
1846 case InstArithmetic::Fsub: | 1898 case InstArithmetic::Fsub: |
1847 _mov(T, Src0); | 1899 _mov(T, Src0); |
1848 _subss(T, Src1); | 1900 _subss(T, Src1); |
1849 _mov(Dest, T); | 1901 _mov(Dest, T); |
1850 break; | 1902 break; |
1851 case InstArithmetic::Fmul: | 1903 case InstArithmetic::Fmul: |
1852 _mov(T, Src0); | 1904 _mov(T, Src0); |
1853 _mulss(T, Src0 == Src1 ? T : Src1); | 1905 _mulss(T, Src0 == Src1 ? T : Src1); |
1854 _mov(Dest, T); | 1906 _mov(Dest, T); |
1855 break; | 1907 break; |
1856 case InstArithmetic::Fdiv: | 1908 case InstArithmetic::Fdiv: |
1857 _mov(T, Src0); | 1909 _mov(T, Src0); |
1858 _divss(T, Src1); | 1910 _divss(T, Src1); |
1859 _mov(Dest, T); | 1911 _mov(Dest, T); |
1860 break; | 1912 break; |
1861 case InstArithmetic::Frem: { | 1913 case InstArithmetic::Frem: { |
1862 const SizeT MaxSrcs = 2; | 1914 constexpr SizeT MaxSrcs = 2; |
1863 Type Ty = Dest->getType(); | 1915 Type Ty = Dest->getType(); |
1864 InstCall *Call = makeHelperCall( | 1916 InstCall *Call = makeHelperCall( |
1865 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 1917 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
1866 Call->addArg(Src0); | 1918 Call->addArg(Src0); |
1867 Call->addArg(Src1); | 1919 Call->addArg(Src1); |
1868 return lowerCall(Call); | 1920 return lowerCall(Call); |
1869 } | 1921 } |
1870 } | 1922 } |
1871 } | 1923 } |
1872 | 1924 |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2107 if (isVectorType(Dest->getType())) { | 2159 if (isVectorType(Dest->getType())) { |
2108 assert(Dest->getType() == IceType_v4i32 && | 2160 assert(Dest->getType() == IceType_v4i32 && |
2109 Inst->getSrc(0)->getType() == IceType_v4f32); | 2161 Inst->getSrc(0)->getType() == IceType_v4f32); |
2110 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2162 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2111 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2163 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
2112 Src0RM = legalizeToReg(Src0RM); | 2164 Src0RM = legalizeToReg(Src0RM); |
2113 Variable *T = makeReg(Dest->getType()); | 2165 Variable *T = makeReg(Dest->getType()); |
2114 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2166 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
2115 _movp(Dest, T); | 2167 _movp(Dest, T); |
2116 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2168 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
2117 const SizeT MaxSrcs = 1; | 2169 constexpr SizeT MaxSrcs = 1; |
2118 Type SrcType = Inst->getSrc(0)->getType(); | 2170 Type SrcType = Inst->getSrc(0)->getType(); |
2119 InstCall *Call = | 2171 InstCall *Call = |
2120 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 2172 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
2121 : H_fptosi_f64_i64, | 2173 : H_fptosi_f64_i64, |
2122 Dest, MaxSrcs); | 2174 Dest, MaxSrcs); |
2123 Call->addArg(Inst->getSrc(0)); | 2175 Call->addArg(Inst->getSrc(0)); |
2124 lowerCall(Call); | 2176 lowerCall(Call); |
2125 } else { | 2177 } else { |
2126 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2178 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2127 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2179 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
(...skipping 10 matching lines...) Expand all Loading... |
2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
2139 if (Dest->getType() == IceType_i1) | 2191 if (Dest->getType() == IceType_i1) |
2140 _and(T_2, Ctx->getConstantInt1(1)); | 2192 _and(T_2, Ctx->getConstantInt1(1)); |
2141 _mov(Dest, T_2); | 2193 _mov(Dest, T_2); |
2142 } | 2194 } |
2143 break; | 2195 break; |
2144 case InstCast::Fptoui: | 2196 case InstCast::Fptoui: |
2145 if (isVectorType(Dest->getType())) { | 2197 if (isVectorType(Dest->getType())) { |
2146 assert(Dest->getType() == IceType_v4i32 && | 2198 assert(Dest->getType() == IceType_v4i32 && |
2147 Inst->getSrc(0)->getType() == IceType_v4f32); | 2199 Inst->getSrc(0)->getType() == IceType_v4f32); |
2148 const SizeT MaxSrcs = 1; | 2200 constexpr SizeT MaxSrcs = 1; |
2149 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
2150 Call->addArg(Inst->getSrc(0)); | 2202 Call->addArg(Inst->getSrc(0)); |
2151 lowerCall(Call); | 2203 lowerCall(Call); |
2152 } else if (Dest->getType() == IceType_i64 || | 2204 } else if (Dest->getType() == IceType_i64 || |
2153 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 2205 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
2154 // Use a helper for both x86-32 and x86-64. | 2206 // Use a helper for both x86-32 and x86-64. |
2155 const SizeT MaxSrcs = 1; | 2207 constexpr SizeT MaxSrcs = 1; |
2156 Type DestType = Dest->getType(); | 2208 Type DestType = Dest->getType(); |
2157 Type SrcType = Inst->getSrc(0)->getType(); | 2209 Type SrcType = Inst->getSrc(0)->getType(); |
2158 IceString TargetString; | 2210 IceString TargetString; |
2159 if (Traits::Is64Bit) { | 2211 if (Traits::Is64Bit) { |
2160 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2212 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
2161 : H_fptoui_f64_i64; | 2213 : H_fptoui_f64_i64; |
2162 } else if (isInt32Asserting32Or64(DestType)) { | 2214 } else if (isInt32Asserting32Or64(DestType)) { |
2163 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2215 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
2164 : H_fptoui_f64_i32; | 2216 : H_fptoui_f64_i32; |
2165 } else { | 2217 } else { |
(...skipping 28 matching lines...) Expand all Loading... |
2194 assert(Dest->getType() == IceType_v4f32 && | 2246 assert(Dest->getType() == IceType_v4f32 && |
2195 Inst->getSrc(0)->getType() == IceType_v4i32); | 2247 Inst->getSrc(0)->getType() == IceType_v4i32); |
2196 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2248 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2197 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2249 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
2198 Src0RM = legalizeToReg(Src0RM); | 2250 Src0RM = legalizeToReg(Src0RM); |
2199 Variable *T = makeReg(Dest->getType()); | 2251 Variable *T = makeReg(Dest->getType()); |
2200 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2252 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
2201 _movp(Dest, T); | 2253 _movp(Dest, T); |
2202 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2254 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
2203 // Use a helper for x86-32. | 2255 // Use a helper for x86-32. |
2204 const SizeT MaxSrcs = 1; | 2256 constexpr SizeT MaxSrcs = 1; |
2205 Type DestType = Dest->getType(); | 2257 Type DestType = Dest->getType(); |
2206 InstCall *Call = | 2258 InstCall *Call = |
2207 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2259 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 |
2208 : H_sitofp_i64_f64, | 2260 : H_sitofp_i64_f64, |
2209 Dest, MaxSrcs); | 2261 Dest, MaxSrcs); |
2210 // TODO: Call the correct compiler-rt helper function. | 2262 // TODO: Call the correct compiler-rt helper function. |
2211 Call->addArg(Inst->getSrc(0)); | 2263 Call->addArg(Inst->getSrc(0)); |
2212 lowerCall(Call); | 2264 lowerCall(Call); |
2213 return; | 2265 return; |
2214 } else { | 2266 } else { |
(...skipping 14 matching lines...) Expand all Loading... |
2229 _movsx(T_1, Src0RM); | 2281 _movsx(T_1, Src0RM); |
2230 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2282 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
2231 _mov(Dest, T_2); | 2283 _mov(Dest, T_2); |
2232 } | 2284 } |
2233 break; | 2285 break; |
2234 case InstCast::Uitofp: { | 2286 case InstCast::Uitofp: { |
2235 Operand *Src0 = Inst->getSrc(0); | 2287 Operand *Src0 = Inst->getSrc(0); |
2236 if (isVectorType(Src0->getType())) { | 2288 if (isVectorType(Src0->getType())) { |
2237 assert(Dest->getType() == IceType_v4f32 && | 2289 assert(Dest->getType() == IceType_v4f32 && |
2238 Src0->getType() == IceType_v4i32); | 2290 Src0->getType() == IceType_v4i32); |
2239 const SizeT MaxSrcs = 1; | 2291 constexpr SizeT MaxSrcs = 1; |
2240 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2292 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
2241 Call->addArg(Src0); | 2293 Call->addArg(Src0); |
2242 lowerCall(Call); | 2294 lowerCall(Call); |
2243 } else if (Src0->getType() == IceType_i64 || | 2295 } else if (Src0->getType() == IceType_i64 || |
2244 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { | 2296 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
2245 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on | 2297 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on |
2246 // x86-32. | 2298 // x86-32. |
2247 const SizeT MaxSrcs = 1; | 2299 constexpr SizeT MaxSrcs = 1; |
2248 Type DestType = Dest->getType(); | 2300 Type DestType = Dest->getType(); |
2249 IceString TargetString; | 2301 IceString TargetString; |
2250 if (isInt32Asserting32Or64(Src0->getType())) { | 2302 if (isInt32Asserting32Or64(Src0->getType())) { |
2251 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2303 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 |
2252 : H_uitofp_i32_f64; | 2304 : H_uitofp_i32_f64; |
2253 } else { | 2305 } else { |
2254 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2306 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 |
2255 : H_uitofp_i64_f64; | 2307 : H_uitofp_i64_f64; |
2256 } | 2308 } |
2257 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2309 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2453 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2505 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
2454 ConstantInteger32 *ElementIndex = | 2506 ConstantInteger32 *ElementIndex = |
2455 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); | 2507 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); |
2456 // Only constant indices are allowed in PNaCl IR. | 2508 // Only constant indices are allowed in PNaCl IR. |
2457 assert(ElementIndex); | 2509 assert(ElementIndex); |
2458 | 2510 |
2459 unsigned Index = ElementIndex->getValue(); | 2511 unsigned Index = ElementIndex->getValue(); |
2460 Type Ty = SourceVectNotLegalized->getType(); | 2512 Type Ty = SourceVectNotLegalized->getType(); |
2461 Type ElementTy = typeElementType(Ty); | 2513 Type ElementTy = typeElementType(Ty); |
2462 Type InVectorElementTy = Traits::getInVectorElementType(Ty); | 2514 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
2463 Variable *ExtractedElementR = makeReg(InVectorElementTy); | |
2464 | 2515 |
2465 // TODO(wala): Determine the best lowering sequences for each type. | 2516 // TODO(wala): Determine the best lowering sequences for each type. |
2466 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 2517 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
2467 InstructionSet >= Traits::SSE4_1; | 2518 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32); |
2468 if (CanUsePextr && Ty != IceType_v4f32) { | 2519 Variable *ExtractedElementR = |
2469 // Use pextrb, pextrw, or pextrd. | 2520 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy); |
| 2521 if (CanUsePextr) { |
| 2522 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper |
| 2523 // bits of the destination register, so we represent this by always |
| 2524 // extracting into an i32 register. The _mov into Dest below will do |
| 2525 // truncation as necessary. |
2470 Constant *Mask = Ctx->getConstantInt32(Index); | 2526 Constant *Mask = Ctx->getConstantInt32(Index); |
2471 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); | 2527 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); |
2472 _pextr(ExtractedElementR, SourceVectR, Mask); | 2528 _pextr(ExtractedElementR, SourceVectR, Mask); |
2473 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2529 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2474 // Use pshufd and movd/movss. | 2530 // Use pshufd and movd/movss. |
2475 Variable *T = nullptr; | 2531 Variable *T = nullptr; |
2476 if (Index) { | 2532 if (Index) { |
2477 // The shuffle only needs to occur if the element to be extracted is not | 2533 // The shuffle only needs to occur if the element to be extracted is not |
2478 // at the lowest index. | 2534 // at the lowest index. |
2479 Constant *Mask = Ctx->getConstantInt32(Index); | 2535 Constant *Mask = Ctx->getConstantInt32(Index); |
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2976 // Use insertps, pinsrb, pinsrw, or pinsrd. | 3032 // Use insertps, pinsrb, pinsrw, or pinsrd. |
2977 Operand *ElementRM = | 3033 Operand *ElementRM = |
2978 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3034 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
2979 Operand *SourceVectRM = | 3035 Operand *SourceVectRM = |
2980 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3036 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2981 Variable *T = makeReg(Ty); | 3037 Variable *T = makeReg(Ty); |
2982 _movp(T, SourceVectRM); | 3038 _movp(T, SourceVectRM); |
2983 if (Ty == IceType_v4f32) | 3039 if (Ty == IceType_v4f32) |
2984 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3040 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
2985 else | 3041 else |
| 3042 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source |
| 3043 // operand is a register, it must be a full r32 register like eax, and not |
| 3044 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates |
| 3045 // for the use of r16 and r8 by converting them through getBaseReg(), |
| 3046 // while emitIAS() validates that the original and base register encodings |
| 3047 // are the same. But for an "interior" register like ah, it should |
| 3048 // probably be copied into an r32 via movzx so that the types work out. |
2986 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 3049 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); |
2987 _movp(Inst->getDest(), T); | 3050 _movp(Inst->getDest(), T); |
2988 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 3051 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2989 // Use shufps or movss. | 3052 // Use shufps or movss. |
2990 Variable *ElementR = nullptr; | 3053 Variable *ElementR = nullptr; |
2991 Operand *SourceVectRM = | 3054 Operand *SourceVectRM = |
2992 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3055 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2993 | 3056 |
2994 if (InVectorElementTy == IceType_f32) { | 3057 if (InVectorElementTy == IceType_f32) { |
2995 // ElementR will be in an XMM register since it is floating point. | 3058 // ElementR will be in an XMM register since it is floating point. |
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3310 // well-defined value. | 3373 // well-defined value. |
3311 Operand *Val = legalize(Instr->getArg(0)); | 3374 Operand *Val = legalize(Instr->getArg(0)); |
3312 Operand *FirstVal; | 3375 Operand *FirstVal; |
3313 Operand *SecondVal = nullptr; | 3376 Operand *SecondVal = nullptr; |
3314 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { | 3377 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
3315 FirstVal = loOperand(Val); | 3378 FirstVal = loOperand(Val); |
3316 SecondVal = hiOperand(Val); | 3379 SecondVal = hiOperand(Val); |
3317 } else { | 3380 } else { |
3318 FirstVal = Val; | 3381 FirstVal = Val; |
3319 } | 3382 } |
3320 const bool IsCttz = false; | 3383 constexpr bool IsCttz = false; |
3321 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3384 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
3322 SecondVal); | 3385 SecondVal); |
3323 return; | 3386 return; |
3324 } | 3387 } |
3325 case Intrinsics::Cttz: { | 3388 case Intrinsics::Cttz: { |
3326 // The "is zero undef" parameter is ignored and we always return a | 3389 // The "is zero undef" parameter is ignored and we always return a |
3327 // well-defined value. | 3390 // well-defined value. |
3328 Operand *Val = legalize(Instr->getArg(0)); | 3391 Operand *Val = legalize(Instr->getArg(0)); |
3329 Operand *FirstVal; | 3392 Operand *FirstVal; |
3330 Operand *SecondVal = nullptr; | 3393 Operand *SecondVal = nullptr; |
3331 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { | 3394 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
3332 FirstVal = hiOperand(Val); | 3395 FirstVal = hiOperand(Val); |
3333 SecondVal = loOperand(Val); | 3396 SecondVal = loOperand(Val); |
3334 } else { | 3397 } else { |
3335 FirstVal = Val; | 3398 FirstVal = Val; |
3336 } | 3399 } |
3337 const bool IsCttz = true; | 3400 constexpr bool IsCttz = true; |
3338 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3401 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
3339 SecondVal); | 3402 SecondVal); |
3340 return; | 3403 return; |
3341 } | 3404 } |
3342 case Intrinsics::Fabs: { | 3405 case Intrinsics::Fabs: { |
3343 Operand *Src = legalize(Instr->getArg(0)); | 3406 Operand *Src = legalize(Instr->getArg(0)); |
3344 Type Ty = Src->getType(); | 3407 Type Ty = Src->getType(); |
3345 Variable *Dest = Instr->getDest(); | 3408 Variable *Dest = Instr->getDest(); |
3346 Variable *T = makeVectorOfFabsMask(Ty); | 3409 Variable *T = makeVectorOfFabsMask(Ty); |
3347 // The pand instruction operates on an m128 memory operand, so if Src is an | 3410 // The pand instruction operates on an m128 memory operand, so if Src is an |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3425 Func->setError("Should not be lowering UnknownIntrinsic"); | 3488 Func->setError("Should not be lowering UnknownIntrinsic"); |
3426 return; | 3489 return; |
3427 } | 3490 } |
3428 return; | 3491 return; |
3429 } | 3492 } |
3430 | 3493 |
3431 template <class Machine> | 3494 template <class Machine> |
3432 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3495 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
3433 Operand *Ptr, Operand *Expected, | 3496 Operand *Ptr, Operand *Expected, |
3434 Operand *Desired) { | 3497 Operand *Desired) { |
3435 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { | 3498 Type Ty = Expected->getType(); |
| 3499 if (!Traits::Is64Bit && Ty == IceType_i64) { |
3436 // Reserve the pre-colored registers first, before adding any more | 3500 // Reserve the pre-colored registers first, before adding any more |
3437 // infinite-weight variables from formMemoryOperand's legalization. | 3501 // infinite-weight variables from formMemoryOperand's legalization. |
3438 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3502 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
3439 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3503 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
3440 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3504 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
3441 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3505 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
3442 _mov(T_eax, loOperand(Expected)); | 3506 _mov(T_eax, loOperand(Expected)); |
3443 _mov(T_edx, hiOperand(Expected)); | 3507 _mov(T_edx, hiOperand(Expected)); |
3444 _mov(T_ebx, loOperand(Desired)); | 3508 _mov(T_ebx, loOperand(Desired)); |
3445 _mov(T_ecx, hiOperand(Desired)); | 3509 _mov(T_ecx, hiOperand(Desired)); |
3446 typename Traits::X86OperandMem *Addr = | 3510 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3447 formMemoryOperand(Ptr, Expected->getType()); | 3511 constexpr bool Locked = true; |
3448 const bool Locked = true; | |
3449 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3512 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3450 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3513 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
3451 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3514 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
3452 _mov(DestLo, T_eax); | 3515 _mov(DestLo, T_eax); |
3453 _mov(DestHi, T_edx); | 3516 _mov(DestHi, T_edx); |
3454 return; | 3517 return; |
3455 } | 3518 } |
3456 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); | 3519 int32_t Eax; |
| 3520 switch (Ty) { |
| 3521 default: |
| 3522 llvm_unreachable("Bad type for cmpxchg"); |
| 3523 // fallthrough |
| 3524 case IceType_i32: |
| 3525 Eax = Traits::RegisterSet::Reg_eax; |
| 3526 break; |
| 3527 case IceType_i16: |
| 3528 Eax = Traits::RegisterSet::Reg_ax; |
| 3529 break; |
| 3530 case IceType_i8: |
| 3531 Eax = Traits::RegisterSet::Reg_al; |
| 3532 break; |
| 3533 } |
| 3534 Variable *T_eax = makeReg(Ty, Eax); |
3457 _mov(T_eax, Expected); | 3535 _mov(T_eax, Expected); |
3458 typename Traits::X86OperandMem *Addr = | 3536 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3459 formMemoryOperand(Ptr, Expected->getType()); | |
3460 Variable *DesiredReg = legalizeToReg(Desired); | 3537 Variable *DesiredReg = legalizeToReg(Desired); |
3461 const bool Locked = true; | 3538 constexpr bool Locked = true; |
3462 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3539 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
3463 _mov(DestPrev, T_eax); | 3540 _mov(DestPrev, T_eax); |
3464 } | 3541 } |
3465 | 3542 |
3466 template <class Machine> | 3543 template <class Machine> |
3467 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 3544 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
3468 Operand *PtrToMem, | 3545 Operand *PtrToMem, |
3469 Operand *Expected, | 3546 Operand *Expected, |
3470 Operand *Desired) { | 3547 Operand *Desired) { |
3471 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 3548 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3553 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3630 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3554 // All the fall-through paths must set this to true, but use this | 3631 // All the fall-through paths must set this to true, but use this |
3555 // for asserting. | 3632 // for asserting. |
3556 NeedsCmpxchg = true; | 3633 NeedsCmpxchg = true; |
3557 Op_Lo = &TargetX86Base<Machine>::_add; | 3634 Op_Lo = &TargetX86Base<Machine>::_add; |
3558 Op_Hi = &TargetX86Base<Machine>::_adc; | 3635 Op_Hi = &TargetX86Base<Machine>::_adc; |
3559 break; | 3636 break; |
3560 } | 3637 } |
3561 typename Traits::X86OperandMem *Addr = | 3638 typename Traits::X86OperandMem *Addr = |
3562 formMemoryOperand(Ptr, Dest->getType()); | 3639 formMemoryOperand(Ptr, Dest->getType()); |
3563 const bool Locked = true; | 3640 constexpr bool Locked = true; |
3564 Variable *T = nullptr; | 3641 Variable *T = nullptr; |
3565 _mov(T, Val); | 3642 _mov(T, Val); |
3566 _xadd(Addr, T, Locked); | 3643 _xadd(Addr, T, Locked); |
3567 _mov(Dest, T); | 3644 _mov(Dest, T); |
3568 return; | 3645 return; |
3569 } | 3646 } |
3570 case Intrinsics::AtomicSub: { | 3647 case Intrinsics::AtomicSub: { |
3571 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 3648 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
3572 NeedsCmpxchg = true; | 3649 NeedsCmpxchg = true; |
3573 Op_Lo = &TargetX86Base<Machine>::_sub; | 3650 Op_Lo = &TargetX86Base<Machine>::_sub; |
3574 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3651 Op_Hi = &TargetX86Base<Machine>::_sbb; |
3575 break; | 3652 break; |
3576 } | 3653 } |
3577 typename Traits::X86OperandMem *Addr = | 3654 typename Traits::X86OperandMem *Addr = |
3578 formMemoryOperand(Ptr, Dest->getType()); | 3655 formMemoryOperand(Ptr, Dest->getType()); |
3579 const bool Locked = true; | 3656 constexpr bool Locked = true; |
3580 Variable *T = nullptr; | 3657 Variable *T = nullptr; |
3581 _mov(T, Val); | 3658 _mov(T, Val); |
3582 _neg(T); | 3659 _neg(T); |
3583 _xadd(Addr, T, Locked); | 3660 _xadd(Addr, T, Locked); |
3584 _mov(Dest, T); | 3661 _mov(Dest, T); |
3585 return; | 3662 return; |
3586 } | 3663 } |
3587 case Intrinsics::AtomicOr: | 3664 case Intrinsics::AtomicOr: |
3588 // TODO(jvoung): If Dest is null or dead, then some of these | 3665 // TODO(jvoung): If Dest is null or dead, then some of these |
3589 // operations do not need an "exchange", but just a locked op. | 3666 // operations do not need an "exchange", but just a locked op. |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3677 _mov(T_ecx, T_edx); | 3754 _mov(T_ecx, T_edx); |
3678 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3755 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
3679 } else { | 3756 } else { |
3680 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3757 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
3681 // It just needs the Val loaded into ebx and ecx. | 3758 // It just needs the Val loaded into ebx and ecx. |
3682 // That can also be done before the loop. | 3759 // That can also be done before the loop. |
3683 _mov(T_ebx, loOperand(Val)); | 3760 _mov(T_ebx, loOperand(Val)); |
3684 _mov(T_ecx, hiOperand(Val)); | 3761 _mov(T_ecx, hiOperand(Val)); |
3685 Context.insert(Label); | 3762 Context.insert(Label); |
3686 } | 3763 } |
3687 const bool Locked = true; | 3764 constexpr bool Locked = true; |
3688 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3765 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3689 _br(Traits::Cond::Br_ne, Label); | 3766 _br(Traits::Cond::Br_ne, Label); |
3690 if (!IsXchg8b) { | 3767 if (!IsXchg8b) { |
3691 // If Val is a variable, model the extended live range of Val through | 3768 // If Val is a variable, model the extended live range of Val through |
3692 // the end of the loop, since it will be re-used by the loop. | 3769 // the end of the loop, since it will be re-used by the loop. |
3693 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3770 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3694 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3771 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
3695 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3772 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
3696 Context.insert(InstFakeUse::create(Func, ValLo)); | 3773 Context.insert(InstFakeUse::create(Func, ValLo)); |
3697 Context.insert(InstFakeUse::create(Func, ValHi)); | 3774 Context.insert(InstFakeUse::create(Func, ValHi)); |
3698 } | 3775 } |
3699 } else { | 3776 } else { |
3700 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3777 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
3701 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3778 Context.insert(InstFakeUse::create(Func, T_ebx)); |
3702 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3779 Context.insert(InstFakeUse::create(Func, T_ecx)); |
3703 } | 3780 } |
3704 // The address base (if any) is also reused in the loop. | 3781 // The address base (if any) is also reused in the loop. |
3705 if (Variable *Base = Addr->getBase()) | 3782 if (Variable *Base = Addr->getBase()) |
3706 Context.insert(InstFakeUse::create(Func, Base)); | 3783 Context.insert(InstFakeUse::create(Func, Base)); |
3707 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3784 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3708 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3785 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3709 _mov(DestLo, T_eax); | 3786 _mov(DestLo, T_eax); |
3710 _mov(DestHi, T_edx); | 3787 _mov(DestHi, T_edx); |
3711 return; | 3788 return; |
3712 } | 3789 } |
3713 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3790 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3714 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); | 3791 int32_t Eax; |
| 3792 switch (Ty) { |
| 3793 default: |
| 3794 llvm_unreachable("Bad type for atomicRMW"); |
| 3795 // fallthrough |
| 3796 case IceType_i32: |
| 3797 Eax = Traits::RegisterSet::Reg_eax; |
| 3798 break; |
| 3799 case IceType_i16: |
| 3800 Eax = Traits::RegisterSet::Reg_ax; |
| 3801 break; |
| 3802 case IceType_i8: |
| 3803 Eax = Traits::RegisterSet::Reg_al; |
| 3804 break; |
| 3805 } |
| 3806 Variable *T_eax = makeReg(Ty, Eax); |
3715 _mov(T_eax, Addr); | 3807 _mov(T_eax, Addr); |
3716 typename Traits::Insts::Label *Label = | 3808 typename Traits::Insts::Label *Label = |
3717 Traits::Insts::Label::create(Func, this); | 3809 Traits::Insts::Label::create(Func, this); |
3718 Context.insert(Label); | 3810 Context.insert(Label); |
3719 // We want to pick a different register for T than Eax, so don't use | 3811 // We want to pick a different register for T than Eax, so don't use |
3720 // _mov(T == nullptr, T_eax). | 3812 // _mov(T == nullptr, T_eax). |
3721 Variable *T = makeReg(Ty); | 3813 Variable *T = makeReg(Ty); |
3722 _mov(T, T_eax); | 3814 _mov(T, T_eax); |
3723 (this->*Op_Lo)(T, Val); | 3815 (this->*Op_Lo)(T, Val); |
3724 const bool Locked = true; | 3816 constexpr bool Locked = true; |
3725 _cmpxchg(Addr, T_eax, T, Locked); | 3817 _cmpxchg(Addr, T_eax, T, Locked); |
3726 _br(Traits::Cond::Br_ne, Label); | 3818 _br(Traits::Cond::Br_ne, Label); |
3727 // If Val is a variable, model the extended live range of Val through | 3819 // If Val is a variable, model the extended live range of Val through |
3728 // the end of the loop, since it will be re-used by the loop. | 3820 // the end of the loop, since it will be re-used by the loop. |
3729 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3821 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3730 Context.insert(InstFakeUse::create(Func, ValVar)); | 3822 Context.insert(InstFakeUse::create(Func, ValVar)); |
3731 } | 3823 } |
3732 // The address base (if any) is also reused in the loop. | 3824 // The address base (if any) is also reused in the loop. |
3733 if (Variable *Base = Addr->getBase()) | 3825 if (Variable *Base = Addr->getBase()) |
3734 Context.insert(InstFakeUse::create(Func, Base)); | 3826 Context.insert(InstFakeUse::create(Func, Base)); |
(...skipping 1476 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5211 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 5303 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
5212 Ty == IceType_v16i8); | 5304 Ty == IceType_v16i8); |
5213 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 5305 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
5214 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 5306 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
5215 SizeT Shift = | 5307 SizeT Shift = |
5216 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; | 5308 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; |
5217 _psll(Reg, Ctx->getConstantInt8(Shift)); | 5309 _psll(Reg, Ctx->getConstantInt8(Shift)); |
5218 return Reg; | 5310 return Reg; |
5219 } else { | 5311 } else { |
5220 // SSE has no left shift operation for vectors of 8 bit integers. | 5312 // SSE has no left shift operation for vectors of 8 bit integers. |
5221 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 5313 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
5222 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); | 5314 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
5223 Variable *Reg = makeReg(Ty, RegNum); | 5315 Variable *Reg = makeReg(Ty, RegNum); |
5224 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 5316 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
5225 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 5317 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
5226 return Reg; | 5318 return Reg; |
5227 } | 5319 } |
5228 } | 5320 } |
5229 | 5321 |
5230 /// Construct a mask in a register that can be and'ed with a floating-point | 5322 /// Construct a mask in a register that can be and'ed with a floating-point |
5231 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 | 5323 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 |
(...skipping 13 matching lines...) Expand all Loading... |
5245 typename TargetX86Base<Machine>::Traits::X86OperandMem * | 5337 typename TargetX86Base<Machine>::Traits::X86OperandMem * |
5246 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, | 5338 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
5247 uint32_t Offset) { | 5339 uint32_t Offset) { |
5248 // Ensure that Loc is a stack slot. | 5340 // Ensure that Loc is a stack slot. |
5249 assert(Slot->mustNotHaveReg()); | 5341 assert(Slot->mustNotHaveReg()); |
5250 assert(Slot->getRegNum() == Variable::NoRegister); | 5342 assert(Slot->getRegNum() == Variable::NoRegister); |
5251 // Compute the location of Loc in memory. | 5343 // Compute the location of Loc in memory. |
5252 // TODO(wala,stichnot): lea should not | 5344 // TODO(wala,stichnot): lea should not |
5253 // be required. The address of the stack slot is known at compile time | 5345 // be required. The address of the stack slot is known at compile time |
5254 // (although not until after addProlog()). | 5346 // (although not until after addProlog()). |
5255 const Type PointerType = IceType_i32; | 5347 constexpr Type PointerType = IceType_i32; |
5256 Variable *Loc = makeReg(PointerType); | 5348 Variable *Loc = makeReg(PointerType); |
5257 _lea(Loc, Slot); | 5349 _lea(Loc, Slot); |
5258 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5350 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
5259 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); | 5351 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); |
5260 } | 5352 } |
5261 | 5353 |
5262 /// Helper for legalize() to emit the right code to lower an operand to a | 5354 /// Helper for legalize() to emit the right code to lower an operand to a |
5263 /// register of the appropriate type. | 5355 /// register of the appropriate type. |
5264 template <class Machine> | 5356 template <class Machine> |
5265 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5357 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5298 if (Subst->mustHaveReg() && !Subst->hasReg()) { | 5390 if (Subst->mustHaveReg() && !Subst->hasReg()) { |
5299 // At this point we know the substitution will have a register. | 5391 // At this point we know the substitution will have a register. |
5300 if (From->getType() == Subst->getType()) { | 5392 if (From->getType() == Subst->getType()) { |
5301 // At this point we know the substitution's register is compatible. | 5393 // At this point we know the substitution's register is compatible. |
5302 return Subst; | 5394 return Subst; |
5303 } | 5395 } |
5304 } | 5396 } |
5305 } | 5397 } |
5306 } | 5398 } |
5307 | 5399 |
5308 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { | 5400 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { |
5309 // Before doing anything with a Mem operand, we need to ensure that the | 5401 // Before doing anything with a Mem operand, we need to ensure that the |
5310 // Base and Index components are in physical registers. | 5402 // Base and Index components are in physical registers. |
5311 Variable *Base = Mem->getBase(); | 5403 Variable *Base = Mem->getBase(); |
5312 Variable *Index = Mem->getIndex(); | 5404 Variable *Index = Mem->getIndex(); |
5313 Variable *RegBase = nullptr; | 5405 Variable *RegBase = nullptr; |
5314 Variable *RegIndex = nullptr; | 5406 Variable *RegIndex = nullptr; |
5315 if (Base) { | 5407 if (Base) { |
5316 RegBase = legalizeToReg(Base); | 5408 RegBase = legalizeToReg(Base); |
5317 } | 5409 } |
5318 if (Index) { | 5410 if (Index) { |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5376 // Immediate specifically not allowed | 5468 // Immediate specifically not allowed |
5377 NeedsReg = true; | 5469 NeedsReg = true; |
5378 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) | 5470 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) |
5379 // On x86, FP constants are lowered to mem operands. | 5471 // On x86, FP constants are lowered to mem operands. |
5380 NeedsReg = true; | 5472 NeedsReg = true; |
5381 if (NeedsReg) { | 5473 if (NeedsReg) { |
5382 From = copyToReg(From, RegNum); | 5474 From = copyToReg(From, RegNum); |
5383 } | 5475 } |
5384 return From; | 5476 return From; |
5385 } | 5477 } |
5386 if (auto Var = llvm::dyn_cast<Variable>(From)) { | 5478 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
5387 // Check if the variable is guaranteed a physical register. This can happen | 5479 // Check if the variable is guaranteed a physical register. This can happen |
5388 // either when the variable is pre-colored or when it is assigned infinite | 5480 // either when the variable is pre-colored or when it is assigned infinite |
5389 // weight. | 5481 // weight. |
5390 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 5482 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
5391 // We need a new physical register for the operand if: | 5483 // We need a new physical register for the operand if: |
5392 // Mem is not allowed and Var isn't guaranteed a physical | 5484 // Mem is not allowed and Var isn't guaranteed a physical |
5393 // register, or | 5485 // register, or |
5394 // RegNum is required and Var->getRegNum() doesn't match. | 5486 // RegNum is required and Var->getRegNum() doesn't match. |
5395 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 5487 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
5396 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 5488 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5631 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); | 5723 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); |
5632 Immediate->setShouldBePooled(true); | 5724 Immediate->setShouldBePooled(true); |
5633 // if we have already assigned a phy register, we must come from | 5725 // if we have already assigned a phy register, we must come from |
5634 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the | 5726 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the |
5635 // assigned register as this assignment is that start of its use-def | 5727 // assigned register as this assignment is that start of its use-def |
5636 // chain. So we add RegNum argument here. | 5728 // chain. So we add RegNum argument here. |
5637 Variable *Reg = makeReg(Immediate->getType(), RegNum); | 5729 Variable *Reg = makeReg(Immediate->getType(), RegNum); |
5638 IceString Label; | 5730 IceString Label; |
5639 llvm::raw_string_ostream Label_stream(Label); | 5731 llvm::raw_string_ostream Label_stream(Label); |
5640 Immediate->emitPoolLabel(Label_stream, Ctx); | 5732 Immediate->emitPoolLabel(Label_stream, Ctx); |
5641 const RelocOffsetT Offset = 0; | 5733 constexpr RelocOffsetT Offset = 0; |
5642 const bool SuppressMangling = true; | 5734 constexpr bool SuppressMangling = true; |
5643 Constant *Symbol = | 5735 Constant *Symbol = |
5644 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); | 5736 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); |
5645 typename Traits::X86OperandMem *MemOperand = | 5737 typename Traits::X86OperandMem *MemOperand = |
5646 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, | 5738 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, |
5647 Symbol); | 5739 Symbol); |
5648 _mov(Reg, MemOperand); | 5740 _mov(Reg, MemOperand); |
5649 return Reg; | 5741 return Reg; |
5650 } | 5742 } |
5651 assert("Unsupported -randomize-pool-immediates option" && false); | 5743 assert("Unsupported -randomize-pool-immediates option" && false); |
5652 } | 5744 } |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5728 // phi lowering, we should not ask for new physical registers in | 5820 // phi lowering, we should not ask for new physical registers in |
5729 // general. However, if we do meet Memory Operand during phi lowering, | 5821 // general. However, if we do meet Memory Operand during phi lowering, |
5730 // we should not blind or pool the immediates for now. | 5822 // we should not blind or pool the immediates for now. |
5731 if (RegNum != Variable::NoRegister) | 5823 if (RegNum != Variable::NoRegister) |
5732 return MemOperand; | 5824 return MemOperand; |
5733 Variable *RegTemp = makeReg(IceType_i32); | 5825 Variable *RegTemp = makeReg(IceType_i32); |
5734 IceString Label; | 5826 IceString Label; |
5735 llvm::raw_string_ostream Label_stream(Label); | 5827 llvm::raw_string_ostream Label_stream(Label); |
5736 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); | 5828 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); |
5737 MemOperand->getOffset()->setShouldBePooled(true); | 5829 MemOperand->getOffset()->setShouldBePooled(true); |
5738 const RelocOffsetT SymOffset = 0; | 5830 constexpr RelocOffsetT SymOffset = 0; |
5739 bool SuppressMangling = true; | 5831 constexpr bool SuppressMangling = true; |
5740 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), | 5832 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), |
5741 SuppressMangling); | 5833 SuppressMangling); |
5742 typename Traits::X86OperandMem *SymbolOperand = | 5834 typename Traits::X86OperandMem *SymbolOperand = |
5743 Traits::X86OperandMem::create( | 5835 Traits::X86OperandMem::create( |
5744 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); | 5836 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); |
5745 _mov(RegTemp, SymbolOperand); | 5837 _mov(RegTemp, SymbolOperand); |
5746 // If we have a base variable here, we should add the lea instruction | 5838 // If we have a base variable here, we should add the lea instruction |
5747 // to add the value of the base variable to RegTemp. If there is no | 5839 // to add the value of the base variable to RegTemp. If there is no |
5748 // base variable, we won't need this lea instruction. | 5840 // base variable, we won't need this lea instruction. |
5749 if (MemOperand->getBase()) { | 5841 if (MemOperand->getBase()) { |
(...skipping 15 matching lines...) Expand all Loading... |
5765 } | 5857 } |
5766 // the offset is not eligible for blinding or pooling, return the original | 5858 // the offset is not eligible for blinding or pooling, return the original |
5767 // mem operand | 5859 // mem operand |
5768 return MemOperand; | 5860 return MemOperand; |
5769 } | 5861 } |
5770 | 5862 |
5771 } // end of namespace X86Internal | 5863 } // end of namespace X86Internal |
5772 } // end of namespace Ice | 5864 } // end of namespace Ice |
5773 | 5865 |
5774 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5866 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |