| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 70 // NumUses counts the number of times Var is used as a source operand in the | 70 // NumUses counts the number of times Var is used as a source operand in the |
| 71 // basic block. If IsComplex is true and there is more than one use of Var, | 71 // basic block. If IsComplex is true and there is more than one use of Var, |
| 72 // then the folding optimization is disabled for Var. | 72 // then the folding optimization is disabled for Var. |
| 73 uint32_t NumUses = 0; | 73 uint32_t NumUses = 0; |
| 74 }; | 74 }; |
| 75 | 75 |
| 76 template <class MachineTraits> class BoolFolding { | 76 template <class MachineTraits> class BoolFolding { |
| 77 public: | 77 public: |
| 78 enum BoolFoldingProducerKind { | 78 enum BoolFoldingProducerKind { |
| 79 PK_None, | 79 PK_None, |
| 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. |
| 80 PK_Icmp32, | 81 PK_Icmp32, |
| 81 PK_Icmp64, | 82 PK_Icmp64, |
| 82 PK_Fcmp, | 83 PK_Fcmp, |
| 83 PK_Trunc | 84 PK_Trunc |
| 84 }; | 85 }; |
| 85 | 86 |
| 86 /// Currently the actual enum values are not used (other than CK_None), but we | 87 /// Currently the actual enum values are not used (other than CK_None), but we |
| 87 /// go ahead and produce them anyway for symmetry with the | 88 /// go ahead and produce them anyway for symmetry with the |
| 88 /// BoolFoldingProducerKind. | 89 /// BoolFoldingProducerKind. |
| 89 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 113 }; | 114 }; |
| 114 | 115 |
| 115 template <class MachineTraits> | 116 template <class MachineTraits> |
| 116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) |
| 117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} |
| 118 | 119 |
| 119 template <class MachineTraits> | 120 template <class MachineTraits> |
| 120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind |
| 121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { |
| 122 if (llvm::isa<InstIcmp>(Instr)) { | 123 if (llvm::isa<InstIcmp>(Instr)) { |
| 123 if (Instr->getSrc(0)->getType() != IceType_i64) | 124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) |
| 124 return PK_Icmp32; | 125 return PK_Icmp32; |
| 125 return PK_None; // TODO(stichnot): actually PK_Icmp64; | 126 return PK_None; // TODO(stichnot): actually PK_Icmp64; |
| 126 } | 127 } |
| 127 return PK_None; // TODO(stichnot): remove this | 128 return PK_None; // TODO(stichnot): remove this |
| 128 | 129 |
| 129 if (llvm::isa<InstFcmp>(Instr)) | 130 if (llvm::isa<InstFcmp>(Instr)) |
| 130 return PK_Fcmp; | 131 return PK_Fcmp; |
| 131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 132 switch (Cast->getCastKind()) { | 133 switch (Cast->getCastKind()) { |
| 133 default: | 134 default: |
| (...skipping 502 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 636 // instruction or equivalent. | 637 // instruction or equivalent. |
| 637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 638 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
| 638 // An InstLoad always qualifies. | 639 // An InstLoad always qualifies. |
| 639 LoadDest = Load->getDest(); | 640 LoadDest = Load->getDest(); |
| 640 const bool DoLegalize = false; | 641 const bool DoLegalize = false; |
| 641 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | 642 LoadSrc = formMemoryOperand(Load->getSourceAddress(), |
| 642 LoadDest->getType(), DoLegalize); | 643 LoadDest->getType(), DoLegalize); |
| 643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | 644 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { |
| 644 // An AtomicLoad intrinsic qualifies as long as it has a valid | 645 // An AtomicLoad intrinsic qualifies as long as it has a valid |
| 645 // memory ordering, and can be implemented in a single | 646 // memory ordering, and can be implemented in a single |
| 646 // instruction (i.e., not i64). | 647 // instruction (i.e., not i64 on x86-32). |
| 647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | 648 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; |
| 648 if (ID == Intrinsics::AtomicLoad && | 649 if (ID == Intrinsics::AtomicLoad && |
| 649 Intrin->getDest()->getType() != IceType_i64 && | 650 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && |
| 650 Intrinsics::isMemoryOrderValid( | 651 Intrinsics::isMemoryOrderValid( |
| 651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | 652 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { |
| 652 LoadDest = Intrin->getDest(); | 653 LoadDest = Intrin->getDest(); |
| 653 const bool DoLegalize = false; | 654 const bool DoLegalize = false; |
| 654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | 655 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), |
| 655 DoLegalize); | 656 DoLegalize); |
| 656 } | 657 } |
| 657 } | 658 } |
| 658 // A Load instruction can be folded into the following | 659 // A Load instruction can be folded into the following |
| 659 // instruction only if the following instruction ends the Load's | 660 // instruction only if the following instruction ends the Load's |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 717 template <class Machine> | 718 template <class Machine> |
| 718 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 719 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 720 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { |
| 720 return Br->optimizeBranch(NextNode); | 721 return Br->optimizeBranch(NextNode); |
| 721 } | 722 } |
| 722 return false; | 723 return false; |
| 723 } | 724 } |
| 724 | 725 |
| 725 template <class Machine> | 726 template <class Machine> |
| 726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 727 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { |
| 728 // Special case: never allow partial reads/writes to/from %rBP and %rSP. |
| 729 if (RegNum == Traits::RegisterSet::Reg_esp || |
| 730 RegNum == Traits::RegisterSet::Reg_ebp) |
| 731 Ty = Traits::WordType; |
| 727 if (Ty == IceType_void) | 732 if (Ty == IceType_void) |
| 728 Ty = IceType_i32; | 733 Ty = IceType_i32; |
| 729 if (PhysicalRegisters[Ty].empty()) | 734 if (PhysicalRegisters[Ty].empty()) |
| 730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 735 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
| 731 assert(RegNum < PhysicalRegisters[Ty].size()); | 736 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 732 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 737 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 733 if (Reg == nullptr) { | 738 if (Reg == nullptr) { |
| 734 Reg = Func->makeVariable(Ty); | 739 Reg = Func->makeVariable(Ty); |
| 735 Reg->setRegNum(RegNum); | 740 Reg->setRegNum(RegNum); |
| 736 PhysicalRegisters[Ty][RegNum] = Reg; | 741 PhysicalRegisters[Ty][RegNum] = Reg; |
| (...skipping 26 matching lines...) Expand all Loading... |
| 763 } | 768 } |
| 764 int32_t Offset = Var->getStackOffset(); | 769 int32_t Offset = Var->getStackOffset(); |
| 765 int32_t BaseRegNum = Var->getBaseRegNum(); | 770 int32_t BaseRegNum = Var->getBaseRegNum(); |
| 766 if (BaseRegNum == Variable::NoRegister) { | 771 if (BaseRegNum == Variable::NoRegister) { |
| 767 BaseRegNum = getFrameOrStackReg(); | 772 BaseRegNum = getFrameOrStackReg(); |
| 768 if (!hasFramePointer()) | 773 if (!hasFramePointer()) |
| 769 Offset += getStackAdjustment(); | 774 Offset += getStackAdjustment(); |
| 770 } | 775 } |
| 771 if (Offset) | 776 if (Offset) |
| 772 Str << Offset; | 777 Str << Offset; |
| 773 const Type FrameSPTy = IceType_i32; | 778 const Type FrameSPTy = Traits::WordType; |
| 774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; | 779 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; |
| 775 } | 780 } |
| 776 | 781 |
| 777 template <class Machine> | 782 template <class Machine> |
| 778 typename TargetX86Base<Machine>::Traits::Address | 783 typename TargetX86Base<Machine>::Traits::Address |
| 779 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | 784 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { |
| 780 if (Var->hasReg()) | 785 if (Var->hasReg()) |
| 781 llvm_unreachable("Stack Variable has a register assigned"); | 786 llvm_unreachable("Stack Variable has a register assigned"); |
| 782 if (Var->getWeight().isInf()) { | 787 if (Var->getWeight().isInf()) { |
| 783 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 788 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 803 /// function generates an instruction to copy Arg into its assigned | 808 /// function generates an instruction to copy Arg into its assigned |
| 804 /// register if applicable. | 809 /// register if applicable. |
| 805 template <class Machine> | 810 template <class Machine> |
| 806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 811 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 807 Variable *FramePtr, | 812 Variable *FramePtr, |
| 808 size_t BasicFrameOffset, | 813 size_t BasicFrameOffset, |
| 809 size_t &InArgsSizeBytes) { | 814 size_t &InArgsSizeBytes) { |
| 810 Variable *Lo = Arg->getLo(); | 815 Variable *Lo = Arg->getLo(); |
| 811 Variable *Hi = Arg->getHi(); | 816 Variable *Hi = Arg->getHi(); |
| 812 Type Ty = Arg->getType(); | 817 Type Ty = Arg->getType(); |
| 813 if (Lo && Hi && Ty == IceType_i64) { | 818 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) { |
| 814 // TODO(jpp): This special case is not needed for x86-64. | |
| 815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 819 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 820 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 821 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 822 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 819 return; | 823 return; |
| 820 } | 824 } |
| 821 if (isVectorType(Ty)) { | 825 if (isVectorType(Ty)) { |
| 822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 826 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 823 } | 827 } |
| 824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 828 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 829 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 826 if (Arg->hasReg()) { | 830 if (Arg->hasReg()) { |
| 827 assert(Ty != IceType_i64); | 831 assert(Ty != IceType_i64 || Traits::Is64Bit); |
| 828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( | 832 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( |
| 829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 833 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
| 830 if (isVectorType(Arg->getType())) { | 834 if (isVectorType(Arg->getType())) { |
| 831 _movp(Arg, Mem); | 835 _movp(Arg, Mem); |
| 832 } else { | 836 } else { |
| 833 _mov(Arg, Mem); | 837 _mov(Arg, Mem); |
| 834 } | 838 } |
| 835 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 839 // This argument-copying instruction uses an explicit Traits::X86OperandMem |
| 836 // operand instead of a Variable, so its fill-from-stack operation has to be | 840 // operand instead of a Variable, so its fill-from-stack operation has to be |
| 837 // tracked separately for statistics. | 841 // tracked separately for statistics. |
| 838 Ctx->statsUpdateFills(); | 842 Ctx->statsUpdateFills(); |
| 839 } | 843 } |
| 840 } | 844 } |
| 841 | 845 |
| 842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 846 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 843 // TODO(jpp): this is wrong for x86-64. | 847 return Traits::WordType; |
| 844 return IceType_i32; | |
| 845 } | 848 } |
| 846 | 849 |
| 847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { | 850 template <class Machine> |
| 851 template <typename T> |
| 852 typename std::enable_if<!T::Is64Bit, void>::type |
| 853 TargetX86Base<Machine>::split64(Variable *Var) { |
| 848 switch (Var->getType()) { | 854 switch (Var->getType()) { |
| 849 default: | 855 default: |
| 850 return; | 856 return; |
| 851 case IceType_i64: | 857 case IceType_i64: |
| 852 // TODO: Only consider F64 if we need to push each half when | 858 // TODO: Only consider F64 if we need to push each half when |
| 853 // passing as an argument to a function call. Note that each half | 859 // passing as an argument to a function call. Note that each half |
| 854 // is still typed as I32. | 860 // is still typed as I32. |
| 855 case IceType_f64: | 861 case IceType_f64: |
| 856 break; | 862 break; |
| 857 } | 863 } |
| (...skipping 11 matching lines...) Expand all Loading... |
| 869 Hi->setName(Func, Var->getName(Func) + "__hi"); | 875 Hi->setName(Func, Var->getName(Func) + "__hi"); |
| 870 } | 876 } |
| 871 Var->setLoHi(Lo, Hi); | 877 Var->setLoHi(Lo, Hi); |
| 872 if (Var->getIsArg()) { | 878 if (Var->getIsArg()) { |
| 873 Lo->setIsArg(); | 879 Lo->setIsArg(); |
| 874 Hi->setIsArg(); | 880 Hi->setIsArg(); |
| 875 } | 881 } |
| 876 } | 882 } |
| 877 | 883 |
| 878 template <class Machine> | 884 template <class Machine> |
| 879 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { | 885 template <typename T> |
| 886 typename std::enable_if<!T::Is64Bit, Operand>::type * |
| 887 TargetX86Base<Machine>::loOperand(Operand *Operand) { |
| 880 assert(Operand->getType() == IceType_i64 || | 888 assert(Operand->getType() == IceType_i64 || |
| 881 Operand->getType() == IceType_f64); | 889 Operand->getType() == IceType_f64); |
| 882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 890 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 883 return Operand; | 891 return Operand; |
| 884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 892 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 885 split64(Var); | 893 split64(Var); |
| 886 return Var->getLo(); | 894 return Var->getLo(); |
| 887 } | 895 } |
| 888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 896 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 897 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 898 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
| 891 // Check if we need to blind/pool the constant. | 899 // Check if we need to blind/pool the constant. |
| 892 return legalize(ConstInt); | 900 return legalize(ConstInt); |
| 893 } | 901 } |
| 894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 902 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { |
| 895 auto *MemOperand = Traits::X86OperandMem::create( | 903 auto *MemOperand = Traits::X86OperandMem::create( |
| 896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 904 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
| 897 Mem->getShift(), Mem->getSegmentRegister()); | 905 Mem->getShift(), Mem->getSegmentRegister()); |
| 898 // Test if we should randomize or pool the offset, if so randomize it or | 906 // Test if we should randomize or pool the offset, if so randomize it or |
| 899 // pool it then create mem operand with the blinded/pooled constant. | 907 // pool it then create mem operand with the blinded/pooled constant. |
| 900 // Otherwise, return the mem operand as ordinary mem operand. | 908 // Otherwise, return the mem operand as ordinary mem operand. |
| 901 return legalize(MemOperand); | 909 return legalize(MemOperand); |
| 902 } | 910 } |
| 903 llvm_unreachable("Unsupported operand type"); | 911 llvm_unreachable("Unsupported operand type"); |
| 904 return nullptr; | 912 return nullptr; |
| 905 } | 913 } |
| 906 | 914 |
| 907 template <class Machine> | 915 template <class Machine> |
| 908 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { | 916 template <typename T> |
| 917 typename std::enable_if<!T::Is64Bit, Operand>::type * |
| 918 TargetX86Base<Machine>::hiOperand(Operand *Operand) { |
| 909 assert(Operand->getType() == IceType_i64 || | 919 assert(Operand->getType() == IceType_i64 || |
| 910 Operand->getType() == IceType_f64); | 920 Operand->getType() == IceType_f64); |
| 911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 921 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 912 return Operand; | 922 return Operand; |
| 913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 923 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 914 split64(Var); | 924 split64(Var); |
| 915 return Var->getHi(); | 925 return Var->getHi(); |
| 916 } | 926 } |
| 917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 927 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 928 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1100 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1110 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| 1101 Variable *Dest = Inst->getDest(); | 1111 Variable *Dest = Inst->getDest(); |
| 1102 Operand *Src0 = legalize(Inst->getSrc(0)); | 1112 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1103 Operand *Src1 = legalize(Inst->getSrc(1)); | 1113 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1104 if (Inst->isCommutative()) { | 1114 if (Inst->isCommutative()) { |
| 1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | 1115 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) |
| 1106 std::swap(Src0, Src1); | 1116 std::swap(Src0, Src1); |
| 1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) | 1117 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) |
| 1108 std::swap(Src0, Src1); | 1118 std::swap(Src0, Src1); |
| 1109 } | 1119 } |
| 1110 if (Dest->getType() == IceType_i64) { | 1120 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1111 // These helper-call-involved instructions are lowered in this | 1121 // These x86-32 helper-call-involved instructions are lowered in this |
| 1112 // separate switch. This is because loOperand() and hiOperand() | 1122 // separate switch. This is because loOperand() and hiOperand() |
| 1113 // may insert redundant instructions for constant blinding and | 1123 // may insert redundant instructions for constant blinding and |
| 1114 // pooling. Such redundant instructions will fail liveness analysis | 1124 // pooling. Such redundant instructions will fail liveness analysis |
| 1115 // under -Om1 setting. And, actually these arguments do not need | 1125 // under -Om1 setting. And, actually these arguments do not need |
| 1116 // to be processed with loOperand() and hiOperand() to be used. | 1126 // to be processed with loOperand() and hiOperand() to be used. |
| 1117 switch (Inst->getOp()) { | 1127 switch (Inst->getOp()) { |
| 1118 case InstArithmetic::Udiv: { | 1128 case InstArithmetic::Udiv: { |
| 1119 const SizeT MaxSrcs = 2; | 1129 const SizeT MaxSrcs = 2; |
| 1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1130 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); |
| 1121 Call->addArg(Inst->getSrc(0)); | 1131 Call->addArg(Inst->getSrc(0)); |
| (...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1659 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1650 // this ever becomes a problem we can introduce a pseudo rem instruction | 1660 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1651 // that returns the remainder in %al directly (and uses a mov for copying | 1661 // that returns the remainder in %al directly (and uses a mov for copying |
| 1652 // %ah to %al.) | 1662 // %ah to %al.) |
| 1653 static constexpr uint8_t AlSizeInBits = 8; | 1663 static constexpr uint8_t AlSizeInBits = 8; |
| 1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1664 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1655 _mov(Dest, T); | 1665 _mov(Dest, T); |
| 1656 Context.insert(InstFakeUse::create(Func, T_eax)); | 1666 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1657 } else { | 1667 } else { |
| 1658 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1668 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1669 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); |
| 1670 _mov(T_edx, Zero); |
| 1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1671 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1661 _div(T_edx, Src1, T); | 1672 _div(T_edx, Src1, T); |
| 1662 _mov(Dest, T_edx); | 1673 _mov(Dest, T_edx); |
| 1663 } | 1674 } |
| 1664 break; | 1675 break; |
| 1665 case InstArithmetic::Srem: | 1676 case InstArithmetic::Srem: |
| 1666 // TODO(stichnot): Enable this after doing better performance | 1677 // TODO(stichnot): Enable this after doing better performance |
| 1667 // and cross testing. | 1678 // and cross testing. |
| 1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1679 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1669 // Optimize mod by constant power of 2, but not for Om1 or O0, | 1680 // Optimize mod by constant power of 2, but not for Om1 or O0, |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 1725 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1726 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1716 // this ever becomes a problem we can introduce a pseudo rem instruction | 1727 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1717 // that returns the remainder in %al directly (and uses a mov for copying | 1728 // that returns the remainder in %al directly (and uses a mov for copying |
| 1718 // %ah to %al.) | 1729 // %ah to %al.) |
| 1719 static constexpr uint8_t AlSizeInBits = 8; | 1730 static constexpr uint8_t AlSizeInBits = 8; |
| 1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1731 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1721 _mov(Dest, T); | 1732 _mov(Dest, T); |
| 1722 Context.insert(InstFakeUse::create(Func, T_eax)); | 1733 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1723 } else { | 1734 } else { |
| 1724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1735 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); |
| 1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1726 _cbwdq(T_edx, T); | 1737 _cbwdq(T_edx, T); |
| 1727 _idiv(T_edx, Src1, T); | 1738 _idiv(T_edx, Src1, T); |
| 1728 _mov(Dest, T_edx); | 1739 _mov(Dest, T_edx); |
| 1729 } | 1740 } |
| 1730 break; | 1741 break; |
| 1731 case InstArithmetic::Fadd: | 1742 case InstArithmetic::Fadd: |
| 1732 _mov(T, Src0); | 1743 _mov(T, Src0); |
| 1733 _addss(T, Src1); | 1744 _addss(T, Src1); |
| 1734 _mov(Dest, T); | 1745 _mov(Dest, T); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1758 return lowerCall(Call); | 1769 return lowerCall(Call); |
| 1759 } | 1770 } |
| 1760 } | 1771 } |
| 1761 } | 1772 } |
| 1762 | 1773 |
| 1763 template <class Machine> | 1774 template <class Machine> |
| 1764 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1775 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
| 1765 Variable *Dest = Inst->getDest(); | 1776 Variable *Dest = Inst->getDest(); |
| 1766 Operand *Src0 = Inst->getSrc(0); | 1777 Operand *Src0 = Inst->getSrc(0); |
| 1767 assert(Dest->getType() == Src0->getType()); | 1778 assert(Dest->getType() == Src0->getType()); |
| 1768 if (Dest->getType() == IceType_i64) { | 1779 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1769 Src0 = legalize(Src0); | 1780 Src0 = legalize(Src0); |
| 1770 Operand *Src0Lo = loOperand(Src0); | 1781 Operand *Src0Lo = loOperand(Src0); |
| 1771 Operand *Src0Hi = hiOperand(Src0); | 1782 Operand *Src0Hi = hiOperand(Src0); |
| 1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1783 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1784 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1774 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1785 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 1775 _mov(T_Lo, Src0Lo); | 1786 _mov(T_Lo, Src0Lo); |
| 1776 _mov(DestLo, T_Lo); | 1787 _mov(DestLo, T_Lo); |
| 1777 _mov(T_Hi, Src0Hi); | 1788 _mov(T_Hi, Src0Hi); |
| 1778 _mov(DestHi, T_Hi); | 1789 _mov(DestHi, T_Hi); |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1863 SizeT ShiftAmount = | 1874 SizeT ShiftAmount = |
| 1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - | 1875 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 1865 1; | 1876 1; |
| 1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 1877 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 1867 Variable *T = makeReg(DestTy); | 1878 Variable *T = makeReg(DestTy); |
| 1868 _movp(T, Src0RM); | 1879 _movp(T, Src0RM); |
| 1869 _psll(T, ShiftConstant); | 1880 _psll(T, ShiftConstant); |
| 1870 _psra(T, ShiftConstant); | 1881 _psra(T, ShiftConstant); |
| 1871 _movp(Dest, T); | 1882 _movp(Dest, T); |
| 1872 } | 1883 } |
| 1873 } else if (Dest->getType() == IceType_i64) { | 1884 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | 1885 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 |
| 1875 Constant *Shift = Ctx->getConstantInt32(31); | 1886 Constant *Shift = Ctx->getConstantInt32(31); |
| 1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1887 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1888 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1878 Variable *T_Lo = makeReg(DestLo->getType()); | 1889 Variable *T_Lo = makeReg(DestLo->getType()); |
| 1879 if (Src0RM->getType() == IceType_i32) { | 1890 if (Src0RM->getType() == IceType_i32) { |
| 1880 _mov(T_Lo, Src0RM); | 1891 _mov(T_Lo, Src0RM); |
| 1881 } else if (Src0RM->getType() == IceType_i1) { | 1892 } else if (Src0RM->getType() == IceType_i1) { |
| 1882 _movzx(T_Lo, Src0RM); | 1893 _movzx(T_Lo, Src0RM); |
| 1883 _shl(T_Lo, Shift); | 1894 _shl(T_Lo, Shift); |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1923 case InstCast::Zext: { | 1934 case InstCast::Zext: { |
| 1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1935 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 1925 if (isVectorType(Dest->getType())) { | 1936 if (isVectorType(Dest->getType())) { |
| 1926 // onemask = materialize(1,1,...); dest = onemask & src | 1937 // onemask = materialize(1,1,...); dest = onemask & src |
| 1927 Type DestTy = Dest->getType(); | 1938 Type DestTy = Dest->getType(); |
| 1928 Variable *OneMask = makeVectorOfOnes(DestTy); | 1939 Variable *OneMask = makeVectorOfOnes(DestTy); |
| 1929 Variable *T = makeReg(DestTy); | 1940 Variable *T = makeReg(DestTy); |
| 1930 _movp(T, Src0RM); | 1941 _movp(T, Src0RM); |
| 1931 _pand(T, OneMask); | 1942 _pand(T, OneMask); |
| 1932 _movp(Dest, T); | 1943 _movp(Dest, T); |
| 1933 } else if (Dest->getType() == IceType_i64) { | 1944 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1934 // t1=movzx src; dst.lo=t1; dst.hi=0 | 1945 // t1=movzx src; dst.lo=t1; dst.hi=0 |
| 1935 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1946 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1947 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1948 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1938 Variable *Tmp = makeReg(DestLo->getType()); | 1949 Variable *Tmp = makeReg(DestLo->getType()); |
| 1939 if (Src0RM->getType() == IceType_i32) { | 1950 if (Src0RM->getType() == IceType_i32) { |
| 1940 _mov(Tmp, Src0RM); | 1951 _mov(Tmp, Src0RM); |
| 1941 } else { | 1952 } else { |
| 1942 _movzx(Tmp, Src0RM); | 1953 _movzx(Tmp, Src0RM); |
| 1943 } | 1954 } |
| 1944 if (Src0RM->getType() == IceType_i1) { | 1955 if (Src0RM->getType() == IceType_i1) { |
| 1945 Constant *One = Ctx->getConstantInt32(1); | 1956 Constant *One = Ctx->getConstantInt32(1); |
| 1946 _and(Tmp, One); | 1957 _and(Tmp, One); |
| 1947 } | 1958 } |
| 1948 _mov(DestLo, Tmp); | 1959 _mov(DestLo, Tmp); |
| 1949 _mov(DestHi, Zero); | 1960 _mov(DestHi, Zero); |
| 1950 } else if (Src0RM->getType() == IceType_i1) { | 1961 } else if (Src0RM->getType() == IceType_i1) { |
| 1951 // t = Src0RM; t &= 1; Dest = t | 1962 // t = Src0RM; t &= 1; Dest = t |
| 1952 Constant *One = Ctx->getConstantInt32(1); | 1963 Constant *One = Ctx->getConstantInt32(1); |
| 1953 Type DestTy = Dest->getType(); | 1964 Type DestTy = Dest->getType(); |
| 1954 Variable *T; | 1965 Variable *T = nullptr; |
| 1955 if (DestTy == IceType_i8) { | 1966 if (DestTy == IceType_i8) { |
| 1956 T = makeReg(DestTy); | |
| 1957 _mov(T, Src0RM); | 1967 _mov(T, Src0RM); |
| 1958 } else { | 1968 } else { |
| 1969 assert(DestTy != IceType_i1); |
| 1970 assert(Traits::Is64Bit || DestTy != IceType_i64); |
| 1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. | 1971 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. |
| 1960 T = makeReg(IceType_i32); | 1972 // In x86-64 we need to widen T to 64-bits to ensure that T -- if |
| 1973 // written to the stack (i.e., in -Om1) will be fully zero-extended. |
| 1974 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32); |
| 1961 _movzx(T, Src0RM); | 1975 _movzx(T, Src0RM); |
| 1962 } | 1976 } |
| 1963 _and(T, One); | 1977 _and(T, One); |
| 1964 _mov(Dest, T); | 1978 _mov(Dest, T); |
| 1965 } else { | 1979 } else { |
| 1966 // t1 = movzx src; dst = t1 | 1980 // t1 = movzx src; dst = t1 |
| 1967 Variable *T = makeReg(Dest->getType()); | 1981 Variable *T = makeReg(Dest->getType()); |
| 1968 _movzx(T, Src0RM); | 1982 _movzx(T, Src0RM); |
| 1969 _mov(Dest, T); | 1983 _mov(Dest, T); |
| 1970 } | 1984 } |
| 1971 break; | 1985 break; |
| 1972 } | 1986 } |
| 1973 case InstCast::Trunc: { | 1987 case InstCast::Trunc: { |
| 1974 if (isVectorType(Dest->getType())) { | 1988 if (isVectorType(Dest->getType())) { |
| 1975 // onemask = materialize(1,1,...); dst = src & onemask | 1989 // onemask = materialize(1,1,...); dst = src & onemask |
| 1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1990 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 1977 Type Src0Ty = Src0RM->getType(); | 1991 Type Src0Ty = Src0RM->getType(); |
| 1978 Variable *OneMask = makeVectorOfOnes(Src0Ty); | 1992 Variable *OneMask = makeVectorOfOnes(Src0Ty); |
| 1979 Variable *T = makeReg(Dest->getType()); | 1993 Variable *T = makeReg(Dest->getType()); |
| 1980 _movp(T, Src0RM); | 1994 _movp(T, Src0RM); |
| 1981 _pand(T, OneMask); | 1995 _pand(T, OneMask); |
| 1982 _movp(Dest, T); | 1996 _movp(Dest, T); |
| 1983 } else { | 1997 } else { |
| 1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1998 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 1985 if (Src0->getType() == IceType_i64) | 1999 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) |
| 1986 Src0 = loOperand(Src0); | 2000 Src0 = loOperand(Src0); |
| 1987 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2001 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 1988 // t1 = trunc Src0RM; Dest = t1 | 2002 // t1 = trunc Src0RM; Dest = t1 |
| 1989 Variable *T = nullptr; | 2003 Variable *T = nullptr; |
| 1990 _mov(T, Src0RM); | 2004 _mov(T, Src0RM); |
| 1991 if (Dest->getType() == IceType_i1) | 2005 if (Dest->getType() == IceType_i1) |
| 1992 _and(T, Ctx->getConstantInt1(1)); | 2006 _and(T, Ctx->getConstantInt1(1)); |
| 1993 _mov(Dest, T); | 2007 _mov(Dest, T); |
| 1994 } | 2008 } |
| 1995 break; | 2009 break; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 2006 case InstCast::Fptosi: | 2020 case InstCast::Fptosi: |
| 2007 if (isVectorType(Dest->getType())) { | 2021 if (isVectorType(Dest->getType())) { |
| 2008 assert(Dest->getType() == IceType_v4i32 && | 2022 assert(Dest->getType() == IceType_v4i32 && |
| 2009 Inst->getSrc(0)->getType() == IceType_v4f32); | 2023 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2024 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2025 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2012 Src0RM = legalizeToReg(Src0RM); | 2026 Src0RM = legalizeToReg(Src0RM); |
| 2013 Variable *T = makeReg(Dest->getType()); | 2027 Variable *T = makeReg(Dest->getType()); |
| 2014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2028 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2015 _movp(Dest, T); | 2029 _movp(Dest, T); |
| 2016 } else if (Dest->getType() == IceType_i64) { | 2030 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2017 // Use a helper for converting floating-point values to 64-bit | 2031 // Use a helper for converting floating-point values to 64-bit |
| 2018 // integers. SSE2 appears to have no way to convert from xmm | 2032 // integers. SSE2 appears to have no way to convert from xmm |
| 2019 // registers to something like the edx:eax register pair, and | 2033 // registers to something like the edx:eax register pair, and |
| 2020 // gcc and clang both want to use x87 instructions complete with | 2034 // gcc and clang both want to use x87 instructions complete with |
| 2021 // temporary manipulation of the status word. This helper is | 2035 // temporary manipulation of the status word. This helper is |
| 2022 // not needed for x86-64. | 2036 // not needed for x86-64. |
| 2023 split64(Dest); | 2037 split64(Dest); |
| 2024 const SizeT MaxSrcs = 1; | 2038 const SizeT MaxSrcs = 1; |
| 2025 Type SrcType = Inst->getSrc(0)->getType(); | 2039 Type SrcType = Inst->getSrc(0)->getType(); |
| 2026 InstCall *Call = | 2040 InstCall *Call = |
| 2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 2041 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 2028 : H_fptosi_f64_i64, | 2042 : H_fptosi_f64_i64, |
| 2029 Dest, MaxSrcs); | 2043 Dest, MaxSrcs); |
| 2030 Call->addArg(Inst->getSrc(0)); | 2044 Call->addArg(Inst->getSrc(0)); |
| 2031 lowerCall(Call); | 2045 lowerCall(Call); |
| 2032 } else { | 2046 } else { |
| 2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2047 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2048 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2035 Variable *T_1 = makeReg(IceType_i32); | 2049 Variable *T_1 = nullptr; |
| 2050 if (Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2051 T_1 = makeReg(IceType_i64); |
| 2052 } else { |
| 2053 assert(Dest->getType() != IceType_i64); |
| 2054 T_1 = makeReg(IceType_i32); |
| 2055 } |
| 2056 // cvt() requires its integer argument to be a GPR. |
| 2057 T_1->setWeightInfinite(); |
| 2036 Variable *T_2 = makeReg(Dest->getType()); | 2058 Variable *T_2 = makeReg(Dest->getType()); |
| 2037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2059 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2060 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2039 if (Dest->getType() == IceType_i1) | 2061 if (Dest->getType() == IceType_i1) |
| 2040 _and(T_2, Ctx->getConstantInt1(1)); | 2062 _and(T_2, Ctx->getConstantInt1(1)); |
| 2041 _mov(Dest, T_2); | 2063 _mov(Dest, T_2); |
| 2042 } | 2064 } |
| 2043 break; | 2065 break; |
| 2044 case InstCast::Fptoui: | 2066 case InstCast::Fptoui: |
| 2045 if (isVectorType(Dest->getType())) { | 2067 if (isVectorType(Dest->getType())) { |
| 2046 assert(Dest->getType() == IceType_v4i32 && | 2068 assert(Dest->getType() == IceType_v4i32 && |
| 2047 Inst->getSrc(0)->getType() == IceType_v4f32); | 2069 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2048 const SizeT MaxSrcs = 1; | 2070 const SizeT MaxSrcs = 1; |
| 2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2071 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
| 2050 Call->addArg(Inst->getSrc(0)); | 2072 Call->addArg(Inst->getSrc(0)); |
| 2051 lowerCall(Call); | 2073 lowerCall(Call); |
| 2052 } else if (Dest->getType() == IceType_i64 || | 2074 } else if (Dest->getType() == IceType_i64 || |
| 2053 Dest->getType() == IceType_i32) { | 2075 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
| 2054 // Use a helper for both x86-32 and x86-64. | 2076 // Use a helper for both x86-32 and x86-64. |
| 2055 split64(Dest); | 2077 if (!Traits::Is64Bit) |
| 2078 split64(Dest); |
| 2056 const SizeT MaxSrcs = 1; | 2079 const SizeT MaxSrcs = 1; |
| 2057 Type DestType = Dest->getType(); | 2080 Type DestType = Dest->getType(); |
| 2058 Type SrcType = Inst->getSrc(0)->getType(); | 2081 Type SrcType = Inst->getSrc(0)->getType(); |
| 2059 IceString TargetString; | 2082 IceString TargetString; |
| 2060 if (isInt32Asserting32Or64(DestType)) { | 2083 if (Traits::Is64Bit) { |
| 2084 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2085 : H_fptoui_f64_i64; |
| 2086 } else if (isInt32Asserting32Or64(DestType)) { |
| 2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2087 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
| 2062 : H_fptoui_f64_i32; | 2088 : H_fptoui_f64_i32; |
| 2063 } else { | 2089 } else { |
| 2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2090 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2065 : H_fptoui_f64_i64; | 2091 : H_fptoui_f64_i64; |
| 2066 } | 2092 } |
| 2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2093 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| 2068 Call->addArg(Inst->getSrc(0)); | 2094 Call->addArg(Inst->getSrc(0)); |
| 2069 lowerCall(Call); | 2095 lowerCall(Call); |
| 2070 return; | 2096 return; |
| 2071 } else { | 2097 } else { |
| 2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2099 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2074 Variable *T_1 = makeReg(IceType_i32); | 2100 assert(Dest->getType() != IceType_i64); |
| 2101 Variable *T_1 = nullptr; |
| 2102 if (Traits::Is64Bit && Dest->getType() == IceType_i32) { |
| 2103 T_1 = makeReg(IceType_i64); |
| 2104 } else { |
| 2105 assert(Dest->getType() != IceType_i32); |
| 2106 T_1 = makeReg(IceType_i32); |
| 2107 } |
| 2108 T_1->setWeightInfinite(); |
| 2075 Variable *T_2 = makeReg(Dest->getType()); | 2109 Variable *T_2 = makeReg(Dest->getType()); |
| 2076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2110 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2111 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2078 if (Dest->getType() == IceType_i1) | 2112 if (Dest->getType() == IceType_i1) |
| 2079 _and(T_2, Ctx->getConstantInt1(1)); | 2113 _and(T_2, Ctx->getConstantInt1(1)); |
| 2080 _mov(Dest, T_2); | 2114 _mov(Dest, T_2); |
| 2081 } | 2115 } |
| 2082 break; | 2116 break; |
| 2083 case InstCast::Sitofp: | 2117 case InstCast::Sitofp: |
| 2084 if (isVectorType(Dest->getType())) { | 2118 if (isVectorType(Dest->getType())) { |
| 2085 assert(Dest->getType() == IceType_v4f32 && | 2119 assert(Dest->getType() == IceType_v4f32 && |
| 2086 Inst->getSrc(0)->getType() == IceType_v4i32); | 2120 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2121 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2122 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2089 Src0RM = legalizeToReg(Src0RM); | 2123 Src0RM = legalizeToReg(Src0RM); |
| 2090 Variable *T = makeReg(Dest->getType()); | 2124 Variable *T = makeReg(Dest->getType()); |
| 2091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2125 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
| 2092 _movp(Dest, T); | 2126 _movp(Dest, T); |
| 2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) { | 2127 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
| 2094 // Use a helper for x86-32. | 2128 // Use a helper for x86-32. |
| 2095 const SizeT MaxSrcs = 1; | 2129 const SizeT MaxSrcs = 1; |
| 2096 Type DestType = Dest->getType(); | 2130 Type DestType = Dest->getType(); |
| 2097 InstCall *Call = | 2131 InstCall *Call = |
| 2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2132 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 |
| 2099 : H_sitofp_i64_f64, | 2133 : H_sitofp_i64_f64, |
| 2100 Dest, MaxSrcs); | 2134 Dest, MaxSrcs); |
| 2101 // TODO: Call the correct compiler-rt helper function. | 2135 // TODO: Call the correct compiler-rt helper function. |
| 2102 Call->addArg(Inst->getSrc(0)); | 2136 Call->addArg(Inst->getSrc(0)); |
| 2103 lowerCall(Call); | 2137 lowerCall(Call); |
| 2104 return; | 2138 return; |
| 2105 } else { | 2139 } else { |
| 2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2140 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2107 // Sign-extend the operand. | 2141 // Sign-extend the operand. |
| 2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2142 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2109 Variable *T_1 = makeReg(IceType_i32); | 2143 Variable *T_1 = nullptr; |
| 2144 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { |
| 2145 T_1 = makeReg(IceType_i64); |
| 2146 } else { |
| 2147 assert(Src0RM->getType() != IceType_i64); |
| 2148 T_1 = makeReg(IceType_i32); |
| 2149 } |
| 2150 T_1->setWeightInfinite(); |
| 2110 Variable *T_2 = makeReg(Dest->getType()); | 2151 Variable *T_2 = makeReg(Dest->getType()); |
| 2111 if (Src0RM->getType() == IceType_i32) | 2152 if (Src0RM->getType() == T_1->getType()) |
| 2112 _mov(T_1, Src0RM); | 2153 _mov(T_1, Src0RM); |
| 2113 else | 2154 else |
| 2114 _movsx(T_1, Src0RM); | 2155 _movsx(T_1, Src0RM); |
| 2115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2116 _mov(Dest, T_2); | 2157 _mov(Dest, T_2); |
| 2117 } | 2158 } |
| 2118 break; | 2159 break; |
| 2119 case InstCast::Uitofp: { | 2160 case InstCast::Uitofp: { |
| 2120 Operand *Src0 = Inst->getSrc(0); | 2161 Operand *Src0 = Inst->getSrc(0); |
| 2121 if (isVectorType(Src0->getType())) { | 2162 if (isVectorType(Src0->getType())) { |
| 2122 assert(Dest->getType() == IceType_v4f32 && | 2163 assert(Dest->getType() == IceType_v4f32 && |
| 2123 Src0->getType() == IceType_v4i32); | 2164 Src0->getType() == IceType_v4i32); |
| 2124 const SizeT MaxSrcs = 1; | 2165 const SizeT MaxSrcs = 1; |
| 2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2166 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
| 2126 Call->addArg(Src0); | 2167 Call->addArg(Src0); |
| 2127 lowerCall(Call); | 2168 lowerCall(Call); |
| 2128 } else if (Src0->getType() == IceType_i64 || | 2169 } else if (Src0->getType() == IceType_i64 || |
| 2129 Src0->getType() == IceType_i32) { | 2170 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
| 2130 // Use a helper for x86-32 and x86-64. Also use a helper for | 2171 // Use a helper for x86-32 and x86-64. Also use a helper for |
| 2131 // i32 on x86-32. | 2172 // i32 on x86-32. |
| 2132 const SizeT MaxSrcs = 1; | 2173 const SizeT MaxSrcs = 1; |
| 2133 Type DestType = Dest->getType(); | 2174 Type DestType = Dest->getType(); |
| 2134 IceString TargetString; | 2175 IceString TargetString; |
| 2135 if (isInt32Asserting32Or64(Src0->getType())) { | 2176 if (isInt32Asserting32Or64(Src0->getType())) { |
| 2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2177 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 |
| 2137 : H_uitofp_i32_f64; | 2178 : H_uitofp_i32_f64; |
| 2138 } else { | 2179 } else { |
| 2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2180 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 |
| 2140 : H_uitofp_i64_f64; | 2181 : H_uitofp_i64_f64; |
| 2141 } | 2182 } |
| 2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2183 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| 2143 Call->addArg(Src0); | 2184 Call->addArg(Src0); |
| 2144 lowerCall(Call); | 2185 lowerCall(Call); |
| 2145 return; | 2186 return; |
| 2146 } else { | 2187 } else { |
| 2147 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2188 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2148 // Zero-extend the operand. | 2189 // Zero-extend the operand. |
| 2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2190 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2150 Variable *T_1 = makeReg(IceType_i32); | 2191 Variable *T_1 = nullptr; |
| 2192 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { |
| 2193 T_1 = makeReg(IceType_i64); |
| 2194 } else { |
| 2195 assert(Src0RM->getType() != IceType_i64); |
| 2196 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); |
| 2197 T_1 = makeReg(IceType_i32); |
| 2198 } |
| 2199 T_1->setWeightInfinite(); |
| 2151 Variable *T_2 = makeReg(Dest->getType()); | 2200 Variable *T_2 = makeReg(Dest->getType()); |
| 2152 if (Src0RM->getType() == IceType_i32) | 2201 if (Src0RM->getType() == T_1->getType()) |
| 2153 _mov(T_1, Src0RM); | 2202 _mov(T_1, Src0RM); |
| 2154 else | 2203 else |
| 2155 _movzx(T_1, Src0RM); | 2204 _movzx(T_1, Src0RM); |
| 2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2205 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2157 _mov(Dest, T_2); | 2206 _mov(Dest, T_2); |
| 2158 } | 2207 } |
| 2159 break; | 2208 break; |
| 2160 } | 2209 } |
| 2161 case InstCast::Bitcast: { | 2210 case InstCast::Bitcast: { |
| 2162 Operand *Src0 = Inst->getSrc(0); | 2211 Operand *Src0 = Inst->getSrc(0); |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2198 typename Traits::SpillVariable *SpillVar = | 2247 typename Traits::SpillVariable *SpillVar = |
| 2199 Func->makeVariable<typename Traits::SpillVariable>(SrcType); | 2248 Func->makeVariable<typename Traits::SpillVariable>(SrcType); |
| 2200 SpillVar->setLinkedTo(Dest); | 2249 SpillVar->setLinkedTo(Dest); |
| 2201 Variable *Spill = SpillVar; | 2250 Variable *Spill = SpillVar; |
| 2202 Spill->setWeight(RegWeight::Zero); | 2251 Spill->setWeight(RegWeight::Zero); |
| 2203 _mov(T, Src0RM); | 2252 _mov(T, Src0RM); |
| 2204 _mov(Spill, T); | 2253 _mov(Spill, T); |
| 2205 _mov(Dest, Spill); | 2254 _mov(Dest, Spill); |
| 2206 } break; | 2255 } break; |
| 2207 case IceType_i64: { | 2256 case IceType_i64: { |
| 2208 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2257 assert(Src0->getType() == IceType_f64); |
| 2209 assert(Src0RM->getType() == IceType_f64); | 2258 if (Traits::Is64Bit) { |
| 2210 // a.i64 = bitcast b.f64 ==> | 2259 // Movd requires its fp argument (in this case, the bitcast source) to |
| 2211 // s.f64 = spill b.f64 | 2260 // be an xmm register. |
| 2212 // t_lo.i32 = lo(s.f64) | 2261 Variable *Src0R = legalizeToReg(Src0); |
| 2213 // a_lo.i32 = t_lo.i32 | 2262 Variable *T = makeReg(IceType_i64); |
| 2214 // t_hi.i32 = hi(s.f64) | 2263 _movd(T, Src0R); |
| 2215 // a_hi.i32 = t_hi.i32 | 2264 _mov(Dest, T); |
| 2216 Operand *SpillLo, *SpillHi; | 2265 } else { |
| 2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { | 2266 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2267 // a.i64 = bitcast b.f64 ==> |
| 2268 // s.f64 = spill b.f64 |
| 2269 // t_lo.i32 = lo(s.f64) |
| 2270 // a_lo.i32 = t_lo.i32 |
| 2271 // t_hi.i32 = hi(s.f64) |
| 2272 // a_hi.i32 = t_hi.i32 |
| 2273 Operand *SpillLo, *SpillHi; |
| 2274 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { |
| 2275 typename Traits::SpillVariable *SpillVar = |
| 2276 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); |
| 2277 SpillVar->setLinkedTo(Src0Var); |
| 2278 Variable *Spill = SpillVar; |
| 2279 Spill->setWeight(RegWeight::Zero); |
| 2280 _movq(Spill, Src0RM); |
| 2281 SpillLo = Traits::VariableSplit::create(Func, Spill, |
| 2282 Traits::VariableSplit::Low); |
| 2283 SpillHi = Traits::VariableSplit::create(Func, Spill, |
| 2284 Traits::VariableSplit::High); |
| 2285 } else { |
| 2286 SpillLo = loOperand(Src0RM); |
| 2287 SpillHi = hiOperand(Src0RM); |
| 2288 } |
| 2289 |
| 2290 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2291 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2292 Variable *T_Lo = makeReg(IceType_i32); |
| 2293 Variable *T_Hi = makeReg(IceType_i32); |
| 2294 |
| 2295 _mov(T_Lo, SpillLo); |
| 2296 _mov(DestLo, T_Lo); |
| 2297 _mov(T_Hi, SpillHi); |
| 2298 _mov(DestHi, T_Hi); |
| 2299 } |
| 2300 } break; |
| 2301 case IceType_f64: { |
| 2302 assert(Src0->getType() == IceType_i64); |
| 2303 if (Traits::Is64Bit) { |
| 2304 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2305 Variable *T = makeReg(IceType_f64); |
| 2306 // Movd requires its fp argument (in this case, the bitcast destination) |
| 2307 // to be an xmm register. |
| 2308 T->setWeightInfinite(); |
| 2309 _movd(T, Src0RM); |
| 2310 _mov(Dest, T); |
| 2311 } else { |
| 2312 Src0 = legalize(Src0); |
| 2313 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { |
| 2314 Variable *T = Func->makeVariable(Dest->getType()); |
| 2315 _movq(T, Src0); |
| 2316 _movq(Dest, T); |
| 2317 break; |
| 2318 } |
| 2319 // a.f64 = bitcast b.i64 ==> |
| 2320 // t_lo.i32 = b_lo.i32 |
| 2321 // FakeDef(s.f64) |
| 2322 // lo(s.f64) = t_lo.i32 |
| 2323 // t_hi.i32 = b_hi.i32 |
| 2324 // hi(s.f64) = t_hi.i32 |
| 2325 // a.f64 = s.f64 |
| 2218 typename Traits::SpillVariable *SpillVar = | 2326 typename Traits::SpillVariable *SpillVar = |
| 2219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | 2327 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); |
| 2220 SpillVar->setLinkedTo(Src0Var); | 2328 SpillVar->setLinkedTo(Dest); |
| 2221 Variable *Spill = SpillVar; | 2329 Variable *Spill = SpillVar; |
| 2222 Spill->setWeight(RegWeight::Zero); | 2330 Spill->setWeight(RegWeight::Zero); |
| 2223 _movq(Spill, Src0RM); | 2331 |
| 2224 SpillLo = Traits::VariableSplit::create(Func, Spill, | 2332 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 2225 Traits::VariableSplit::Low); | 2333 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create( |
| 2226 SpillHi = Traits::VariableSplit::create(Func, Spill, | 2334 Func, Spill, Traits::VariableSplit::Low); |
| 2227 Traits::VariableSplit::High); | 2335 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create( |
| 2228 } else { | 2336 Func, Spill, Traits::VariableSplit::High); |
| 2229 SpillLo = loOperand(Src0RM); | 2337 _mov(T_Lo, loOperand(Src0)); |
| 2230 SpillHi = hiOperand(Src0RM); | 2338 // Technically, the Spill is defined after the _store happens, but |
| 2339 // SpillLo is considered a "use" of Spill so define Spill before it |
| 2340 // is used. |
| 2341 Context.insert(InstFakeDef::create(Func, Spill)); |
| 2342 _store(T_Lo, SpillLo); |
| 2343 _mov(T_Hi, hiOperand(Src0)); |
| 2344 _store(T_Hi, SpillHi); |
| 2345 _movq(Dest, Spill); |
| 2231 } | 2346 } |
| 2232 | |
| 2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 2235 Variable *T_Lo = makeReg(IceType_i32); | |
| 2236 Variable *T_Hi = makeReg(IceType_i32); | |
| 2237 | |
| 2238 _mov(T_Lo, SpillLo); | |
| 2239 _mov(DestLo, T_Lo); | |
| 2240 _mov(T_Hi, SpillHi); | |
| 2241 _mov(DestHi, T_Hi); | |
| 2242 } break; | |
| 2243 case IceType_f64: { | |
| 2244 Src0 = legalize(Src0); | |
| 2245 assert(Src0->getType() == IceType_i64); | |
| 2246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { | |
| 2247 Variable *T = Func->makeVariable(Dest->getType()); | |
| 2248 _movq(T, Src0); | |
| 2249 _movq(Dest, T); | |
| 2250 break; | |
| 2251 } | |
| 2252 // a.f64 = bitcast b.i64 ==> | |
| 2253 // t_lo.i32 = b_lo.i32 | |
| 2254 // FakeDef(s.f64) | |
| 2255 // lo(s.f64) = t_lo.i32 | |
| 2256 // t_hi.i32 = b_hi.i32 | |
| 2257 // hi(s.f64) = t_hi.i32 | |
| 2258 // a.f64 = s.f64 | |
| 2259 typename Traits::SpillVariable *SpillVar = | |
| 2260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | |
| 2261 SpillVar->setLinkedTo(Dest); | |
| 2262 Variable *Spill = SpillVar; | |
| 2263 Spill->setWeight(RegWeight::Zero); | |
| 2264 | |
| 2265 Variable *T_Lo = nullptr, *T_Hi = nullptr; | |
| 2266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create( | |
| 2267 Func, Spill, Traits::VariableSplit::Low); | |
| 2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create( | |
| 2269 Func, Spill, Traits::VariableSplit::High); | |
| 2270 _mov(T_Lo, loOperand(Src0)); | |
| 2271 // Technically, the Spill is defined after the _store happens, but | |
| 2272 // SpillLo is considered a "use" of Spill so define Spill before it | |
| 2273 // is used. | |
| 2274 Context.insert(InstFakeDef::create(Func, Spill)); | |
| 2275 _store(T_Lo, SpillLo); | |
| 2276 _mov(T_Hi, hiOperand(Src0)); | |
| 2277 _store(T_Hi, SpillHi); | |
| 2278 _movq(Dest, Spill); | |
| 2279 } break; | 2347 } break; |
| 2280 case IceType_v8i1: { | 2348 case IceType_v8i1: { |
| 2281 assert(Src0->getType() == IceType_i8); | 2349 assert(Src0->getType() == IceType_i8); |
| 2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); | 2350 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); |
| 2283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 2351 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
| 2284 // Arguments to functions are required to be at least 32 bits wide. | 2352 // Arguments to functions are required to be at least 32 bits wide. |
| 2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | 2353 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); |
| 2286 Call->addArg(Src0AsI32); | 2354 Call->addArg(Src0AsI32); |
| 2287 lowerCall(Call); | 2355 lowerCall(Call); |
| 2288 } break; | 2356 } break; |
| (...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2676 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2609 _pxor(T, MinusOne); | 2677 _pxor(T, MinusOne); |
| 2610 } break; | 2678 } break; |
| 2611 } | 2679 } |
| 2612 | 2680 |
| 2613 _movp(Dest, T); | 2681 _movp(Dest, T); |
| 2614 eliminateNextVectorSextInstruction(Dest); | 2682 eliminateNextVectorSextInstruction(Dest); |
| 2615 return; | 2683 return; |
| 2616 } | 2684 } |
| 2617 | 2685 |
| 2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2686 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 2619 if (Src0->getType() == IceType_i64) { | 2687 lowerIcmp64(Inst); |
| 2620 InstIcmp::ICond Condition = Inst->getCondition(); | |
| 2621 size_t Index = static_cast<size_t>(Condition); | |
| 2622 assert(Index < Traits::TableIcmp64Size); | |
| 2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | |
| 2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | |
| 2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
| 2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
| 2627 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
| 2628 Constant *One = Ctx->getConstantInt32(1); | |
| 2629 typename Traits::Insts::Label *LabelFalse = | |
| 2630 Traits::Insts::Label::create(Func, this); | |
| 2631 typename Traits::Insts::Label *LabelTrue = | |
| 2632 Traits::Insts::Label::create(Func, this); | |
| 2633 _mov(Dest, One); | |
| 2634 _cmp(Src0HiRM, Src1HiRI); | |
| 2635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | |
| 2636 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | |
| 2637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | |
| 2638 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | |
| 2639 _cmp(Src0LoRM, Src1LoRI); | |
| 2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | |
| 2641 Context.insert(LabelFalse); | |
| 2642 _mov_nonkillable(Dest, Zero); | |
| 2643 Context.insert(LabelTrue); | |
| 2644 return; | 2688 return; |
| 2645 } | 2689 } |
| 2646 | 2690 |
| 2647 // cmp b, c | 2691 // cmp b, c |
| 2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2692 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 2649 _cmp(Src0RM, Src1); | 2693 _cmp(Src0RM, Src1); |
| 2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); | 2694 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); |
| 2651 } | 2695 } |
| 2652 | 2696 |
| 2697 template <typename Machine> |
| 2698 template <typename T> |
| 2699 typename std::enable_if<!T::Is64Bit, void>::type |
| 2700 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) { |
| 2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| 2702 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 2703 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 2704 Variable *Dest = Inst->getDest(); |
| 2705 InstIcmp::ICond Condition = Inst->getCondition(); |
| 2706 size_t Index = static_cast<size_t>(Condition); |
| 2707 assert(Index < Traits::TableIcmp64Size); |
| 2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); |
| 2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); |
| 2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| 2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| 2712 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2713 Constant *One = Ctx->getConstantInt32(1); |
| 2714 typename Traits::Insts::Label *LabelFalse = |
| 2715 Traits::Insts::Label::create(Func, this); |
| 2716 typename Traits::Insts::Label *LabelTrue = |
| 2717 Traits::Insts::Label::create(Func, this); |
| 2718 _mov(Dest, One); |
| 2719 _cmp(Src0HiRM, Src1HiRI); |
| 2720 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 2721 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 2722 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 2723 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 2724 _cmp(Src0LoRM, Src1LoRI); |
| 2725 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 2726 Context.insert(LabelFalse); |
| 2727 _mov_nonkillable(Dest, Zero); |
| 2728 Context.insert(LabelTrue); |
| 2729 } |
| 2730 |
| 2653 template <class Machine> | 2731 template <class Machine> |
| 2654 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 2732 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
| 2655 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2733 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 2734 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| 2657 ConstantInteger32 *ElementIndex = | 2735 ConstantInteger32 *ElementIndex = |
| 2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 2736 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
| 2659 // Only constant indices are allowed in PNaCl IR. | 2737 // Only constant indices are allowed in PNaCl IR. |
| 2660 assert(ElementIndex); | 2738 assert(ElementIndex); |
| 2661 unsigned Index = ElementIndex->getValue(); | 2739 unsigned Index = ElementIndex->getValue(); |
| 2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 2740 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2841 } | 2919 } |
| 2842 case Intrinsics::AtomicLoad: { | 2920 case Intrinsics::AtomicLoad: { |
| 2843 // We require the memory address to be naturally aligned. | 2921 // We require the memory address to be naturally aligned. |
| 2844 // Given that is the case, then normal loads are atomic. | 2922 // Given that is the case, then normal loads are atomic. |
| 2845 if (!Intrinsics::isMemoryOrderValid( | 2923 if (!Intrinsics::isMemoryOrderValid( |
| 2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) { | 2924 ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
| 2847 Func->setError("Unexpected memory ordering for AtomicLoad"); | 2925 Func->setError("Unexpected memory ordering for AtomicLoad"); |
| 2848 return; | 2926 return; |
| 2849 } | 2927 } |
| 2850 Variable *Dest = Instr->getDest(); | 2928 Variable *Dest = Instr->getDest(); |
| 2851 if (Dest->getType() == IceType_i64) { | 2929 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2852 // Follow what GCC does and use a movq instead of what lowerLoad() | 2930 // Follow what GCC does and use a movq instead of what lowerLoad() |
| 2853 // normally does (split the load into two). | 2931 // normally does (split the load into two). |
| 2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding | 2932 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding |
| 2855 // can't happen anyway, since this is x86-32 and integer arithmetic only | 2933 // can't happen anyway, since this is x86-32 and integer arithmetic only |
| 2856 // happens on 32-bit quantities. | 2934 // happens on 32-bit quantities. |
| 2857 Variable *T = makeReg(IceType_f64); | 2935 Variable *T = makeReg(IceType_f64); |
| 2858 typename Traits::X86OperandMem *Addr = | 2936 typename Traits::X86OperandMem *Addr = |
| 2859 formMemoryOperand(Instr->getArg(0), IceType_f64); | 2937 formMemoryOperand(Instr->getArg(0), IceType_f64); |
| 2860 _movq(T, Addr); | 2938 _movq(T, Addr); |
| 2861 // Then cast the bits back out of the XMM register to the i64 Dest. | 2939 // Then cast the bits back out of the XMM register to the i64 Dest. |
| (...skipping 29 matching lines...) Expand all Loading... |
| 2891 if (!Intrinsics::isMemoryOrderValid( | 2969 if (!Intrinsics::isMemoryOrderValid( |
| 2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 2970 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 2893 Func->setError("Unexpected memory ordering for AtomicStore"); | 2971 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 2894 return; | 2972 return; |
| 2895 } | 2973 } |
| 2896 // We require the memory address to be naturally aligned. | 2974 // We require the memory address to be naturally aligned. |
| 2897 // Given that is the case, then normal stores are atomic. | 2975 // Given that is the case, then normal stores are atomic. |
| 2898 // Add a fence after the store to make it visible. | 2976 // Add a fence after the store to make it visible. |
| 2899 Operand *Value = Instr->getArg(0); | 2977 Operand *Value = Instr->getArg(0); |
| 2900 Operand *Ptr = Instr->getArg(1); | 2978 Operand *Ptr = Instr->getArg(1); |
| 2901 if (Value->getType() == IceType_i64) { | 2979 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { |
| 2902 // Use a movq instead of what lowerStore() normally does | 2980 // Use a movq instead of what lowerStore() normally does |
| 2903 // (split the store into two), following what GCC does. | 2981 // (split the store into two), following what GCC does. |
| 2904 // Cast the bits from int -> to an xmm register first. | 2982 // Cast the bits from int -> to an xmm register first. |
| 2905 Variable *T = makeReg(IceType_f64); | 2983 Variable *T = makeReg(IceType_f64); |
| 2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); | 2984 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); |
| 2907 lowerCast(Cast); | 2985 lowerCast(Cast); |
| 2908 // Then store XMM w/ a movq. | 2986 // Then store XMM w/ a movq. |
| 2909 typename Traits::X86OperandMem *Addr = | 2987 typename Traits::X86OperandMem *Addr = |
| 2910 formMemoryOperand(Ptr, IceType_f64); | 2988 formMemoryOperand(Ptr, IceType_f64); |
| 2911 _storeq(T, Addr); | 2989 _storeq(T, Addr); |
| 2912 _mfence(); | 2990 _mfence(); |
| 2913 return; | 2991 return; |
| 2914 } | 2992 } |
| 2915 InstStore *Store = InstStore::create(Func, Value, Ptr); | 2993 InstStore *Store = InstStore::create(Func, Value, Ptr); |
| 2916 lowerStore(Store); | 2994 lowerStore(Store); |
| 2917 _mfence(); | 2995 _mfence(); |
| 2918 return; | 2996 return; |
| 2919 } | 2997 } |
| 2920 case Intrinsics::Bswap: { | 2998 case Intrinsics::Bswap: { |
| 2921 Variable *Dest = Instr->getDest(); | 2999 Variable *Dest = Instr->getDest(); |
| 2922 Operand *Val = Instr->getArg(0); | 3000 Operand *Val = Instr->getArg(0); |
| 2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the | 3001 // In 32-bit mode, bswap only works on 32-bit arguments, and the |
| 2924 // argument must be a register. Use rotate left for 16-bit bswap. | 3002 // argument must be a register. Use rotate left for 16-bit bswap. |
| 2925 if (Val->getType() == IceType_i64) { | 3003 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 2926 Val = legalizeUndef(Val); | 3004 Val = legalizeUndef(Val); |
| 2927 Variable *T_Lo = legalizeToReg(loOperand(Val)); | 3005 Variable *T_Lo = legalizeToReg(loOperand(Val)); |
| 2928 Variable *T_Hi = legalizeToReg(hiOperand(Val)); | 3006 Variable *T_Hi = legalizeToReg(hiOperand(Val)); |
| 2929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3007 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3008 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2931 _bswap(T_Lo); | 3009 _bswap(T_Lo); |
| 2932 _bswap(T_Hi); | 3010 _bswap(T_Hi); |
| 2933 _mov(DestLo, T_Hi); | 3011 _mov(DestLo, T_Hi); |
| 2934 _mov(DestHi, T_Lo); | 3012 _mov(DestHi, T_Lo); |
| 2935 } else if (Val->getType() == IceType_i32) { | 3013 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) || |
| 3014 Val->getType() == IceType_i32) { |
| 2936 Variable *T = legalizeToReg(Val); | 3015 Variable *T = legalizeToReg(Val); |
| 2937 _bswap(T); | 3016 _bswap(T); |
| 2938 _mov(Dest, T); | 3017 _mov(Dest, T); |
| 2939 } else { | 3018 } else { |
| 2940 assert(Val->getType() == IceType_i16); | 3019 assert(Val->getType() == IceType_i16); |
| 2941 Constant *Eight = Ctx->getConstantInt16(8); | 3020 Constant *Eight = Ctx->getConstantInt16(8); |
| 2942 Variable *T = nullptr; | 3021 Variable *T = nullptr; |
| 2943 Val = legalize(Val); | 3022 Val = legalize(Val); |
| 2944 _mov(T, Val); | 3023 _mov(T, Val); |
| 2945 _rol(T, Eight); | 3024 _rol(T, Eight); |
| 2946 _mov(Dest, T); | 3025 _mov(Dest, T); |
| 2947 } | 3026 } |
| 2948 return; | 3027 return; |
| 2949 } | 3028 } |
| 2950 case Intrinsics::Ctpop: { | 3029 case Intrinsics::Ctpop: { |
| 2951 Variable *Dest = Instr->getDest(); | 3030 Variable *Dest = Instr->getDest(); |
| 3031 Variable *T = nullptr; |
| 2952 Operand *Val = Instr->getArg(0); | 3032 Operand *Val = Instr->getArg(0); |
| 2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) | 3033 Type ValTy = Val->getType(); |
| 2954 ? H_call_ctpop_i32 | 3034 assert(ValTy == IceType_i32 || ValTy == IceType_i64); |
| 2955 : H_call_ctpop_i64, | 3035 |
| 2956 Dest, 1); | 3036 if (!Traits::Is64Bit) { |
| 3037 T = Dest; |
| 3038 } else { |
| 3039 T = makeReg(IceType_i64); |
| 3040 if (ValTy == IceType_i32) { |
| 3041 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by |
| 3042 // converting it to a 64-bit value, and using ctpop_i64. _movzx should |
| 3043 // ensure we will not have any bits set on Val's upper 32 bits. |
| 3044 Variable *V = makeReg(IceType_i64); |
| 3045 _movzx(V, Val); |
| 3046 Val = V; |
| 3047 } |
| 3048 ValTy = IceType_i64; |
| 3049 } |
| 3050 |
| 3051 InstCall *Call = makeHelperCall( |
| 3052 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1); |
| 2957 Call->addArg(Val); | 3053 Call->addArg(Val); |
| 2958 lowerCall(Call); | 3054 lowerCall(Call); |
| 2959 // The popcount helpers always return 32-bit values, while the intrinsic's | 3055 // The popcount helpers always return 32-bit values, while the intrinsic's |
| 2960 // signature matches the native POPCNT instruction and fills a 64-bit reg | 3056 // signature matches the native POPCNT instruction and fills a 64-bit reg |
| 2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case | 3057 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case |
| 2962 // the user doesn't do that in the IR. If the user does that in the IR, | 3058 // the user doesn't do that in the IR. If the user does that in the IR, |
| 2963 // then this zero'ing instruction is dead and gets optimized out. | 3059 // then this zero'ing instruction is dead and gets optimized out. |
| 2964 if (Val->getType() == IceType_i64) { | 3060 if (!Traits::Is64Bit) { |
| 2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3061 assert(T == Dest); |
| 2966 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3062 if (Val->getType() == IceType_i64) { |
| 2967 _mov(DestHi, Zero); | 3063 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3064 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 3065 _mov(DestHi, Zero); |
| 3066 } |
| 3067 } else { |
| 3068 assert(Val->getType() == IceType_i64); |
| 3069 // T is 64 bit. It needs to be copied to dest. We need to: |
| 3070 // |
| 3071 // T_1.32 = trunc T.64 to i32 |
| 3072 // T_2.64 = zext T_1.32 to i64 |
| 3073 // Dest.<<right_size>> = T_2.<<right_size>> |
| 3074 // |
| 3075 // which ensures the upper 32 bits will always be cleared. Just doing a |
| 3076 // |
| 3077 // mov Dest.32 = trunc T.32 to i32 |
| 3078 // |
| 3079 // is dangerous because there's a chance the compiler will optimize this |
| 3080 // copy out. To use _movzx we need two new registers (one 32-, and |
| 3081 // another 64-bit wide.) |
| 3082 Variable *T_1 = makeReg(IceType_i32); |
| 3083 _mov(T_1, T); |
| 3084 Variable *T_2 = makeReg(IceType_i64); |
| 3085 _movzx(T_2, T_1); |
| 3086 _mov(Dest, T_2); |
| 2968 } | 3087 } |
| 2969 return; | 3088 return; |
| 2970 } | 3089 } |
| 2971 case Intrinsics::Ctlz: { | 3090 case Intrinsics::Ctlz: { |
| 2972 // The "is zero undef" parameter is ignored and we always return | 3091 // The "is zero undef" parameter is ignored and we always return |
| 2973 // a well-defined value. | 3092 // a well-defined value. |
| 2974 Operand *Val = legalize(Instr->getArg(0)); | 3093 Operand *Val = legalize(Instr->getArg(0)); |
| 2975 Operand *FirstVal; | 3094 Operand *FirstVal; |
| 2976 Operand *SecondVal = nullptr; | 3095 Operand *SecondVal = nullptr; |
| 2977 if (Val->getType() == IceType_i64) { | 3096 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 2978 FirstVal = loOperand(Val); | 3097 FirstVal = loOperand(Val); |
| 2979 SecondVal = hiOperand(Val); | 3098 SecondVal = hiOperand(Val); |
| 2980 } else { | 3099 } else { |
| 2981 FirstVal = Val; | 3100 FirstVal = Val; |
| 2982 } | 3101 } |
| 2983 const bool IsCttz = false; | 3102 const bool IsCttz = false; |
| 2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3103 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
| 2985 SecondVal); | 3104 SecondVal); |
| 2986 return; | 3105 return; |
| 2987 } | 3106 } |
| 2988 case Intrinsics::Cttz: { | 3107 case Intrinsics::Cttz: { |
| 2989 // The "is zero undef" parameter is ignored and we always return | 3108 // The "is zero undef" parameter is ignored and we always return |
| 2990 // a well-defined value. | 3109 // a well-defined value. |
| 2991 Operand *Val = legalize(Instr->getArg(0)); | 3110 Operand *Val = legalize(Instr->getArg(0)); |
| 2992 Operand *FirstVal; | 3111 Operand *FirstVal; |
| 2993 Operand *SecondVal = nullptr; | 3112 Operand *SecondVal = nullptr; |
| 2994 if (Val->getType() == IceType_i64) { | 3113 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 2995 FirstVal = hiOperand(Val); | 3114 FirstVal = hiOperand(Val); |
| 2996 SecondVal = loOperand(Val); | 3115 SecondVal = loOperand(Val); |
| 2997 } else { | 3116 } else { |
| 2998 FirstVal = Val; | 3117 FirstVal = Val; |
| 2999 } | 3118 } |
| 3000 const bool IsCttz = true; | 3119 const bool IsCttz = true; |
| 3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3120 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
| 3002 SecondVal); | 3121 SecondVal); |
| 3003 return; | 3122 return; |
| 3004 } | 3123 } |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3098 Func->setError("Should not be lowering UnknownIntrinsic"); | 3217 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3099 return; | 3218 return; |
| 3100 } | 3219 } |
| 3101 return; | 3220 return; |
| 3102 } | 3221 } |
| 3103 | 3222 |
| 3104 template <class Machine> | 3223 template <class Machine> |
| 3105 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3224 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3106 Operand *Ptr, Operand *Expected, | 3225 Operand *Ptr, Operand *Expected, |
| 3107 Operand *Desired) { | 3226 Operand *Desired) { |
| 3108 if (Expected->getType() == IceType_i64) { | 3227 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { |
| 3109 // Reserve the pre-colored registers first, before adding any more | 3228 // Reserve the pre-colored registers first, before adding any more |
| 3110 // infinite-weight variables from formMemoryOperand's legalization. | 3229 // infinite-weight variables from formMemoryOperand's legalization. |
| 3111 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3230 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3112 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3231 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3113 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3232 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3114 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3233 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3115 _mov(T_eax, loOperand(Expected)); | 3234 _mov(T_eax, loOperand(Expected)); |
| 3116 _mov(T_edx, hiOperand(Expected)); | 3235 _mov(T_edx, hiOperand(Expected)); |
| 3117 _mov(T_ebx, loOperand(Desired)); | 3236 _mov(T_ebx, loOperand(Desired)); |
| 3118 _mov(T_ecx, hiOperand(Desired)); | 3237 _mov(T_ecx, hiOperand(Desired)); |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3216 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3335 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 3217 Operand *Ptr, Operand *Val) { | 3336 Operand *Ptr, Operand *Val) { |
| 3218 bool NeedsCmpxchg = false; | 3337 bool NeedsCmpxchg = false; |
| 3219 LowerBinOp Op_Lo = nullptr; | 3338 LowerBinOp Op_Lo = nullptr; |
| 3220 LowerBinOp Op_Hi = nullptr; | 3339 LowerBinOp Op_Hi = nullptr; |
| 3221 switch (Operation) { | 3340 switch (Operation) { |
| 3222 default: | 3341 default: |
| 3223 Func->setError("Unknown AtomicRMW operation"); | 3342 Func->setError("Unknown AtomicRMW operation"); |
| 3224 return; | 3343 return; |
| 3225 case Intrinsics::AtomicAdd: { | 3344 case Intrinsics::AtomicAdd: { |
| 3226 if (Dest->getType() == IceType_i64) { | 3345 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3227 // All the fall-through paths must set this to true, but use this | 3346 // All the fall-through paths must set this to true, but use this |
| 3228 // for asserting. | 3347 // for asserting. |
| 3229 NeedsCmpxchg = true; | 3348 NeedsCmpxchg = true; |
| 3230 Op_Lo = &TargetX86Base<Machine>::_add; | 3349 Op_Lo = &TargetX86Base<Machine>::_add; |
| 3231 Op_Hi = &TargetX86Base<Machine>::_adc; | 3350 Op_Hi = &TargetX86Base<Machine>::_adc; |
| 3232 break; | 3351 break; |
| 3233 } | 3352 } |
| 3234 typename Traits::X86OperandMem *Addr = | 3353 typename Traits::X86OperandMem *Addr = |
| 3235 formMemoryOperand(Ptr, Dest->getType()); | 3354 formMemoryOperand(Ptr, Dest->getType()); |
| 3236 const bool Locked = true; | 3355 const bool Locked = true; |
| 3237 Variable *T = nullptr; | 3356 Variable *T = nullptr; |
| 3238 _mov(T, Val); | 3357 _mov(T, Val); |
| 3239 _xadd(Addr, T, Locked); | 3358 _xadd(Addr, T, Locked); |
| 3240 _mov(Dest, T); | 3359 _mov(Dest, T); |
| 3241 return; | 3360 return; |
| 3242 } | 3361 } |
| 3243 case Intrinsics::AtomicSub: { | 3362 case Intrinsics::AtomicSub: { |
| 3244 if (Dest->getType() == IceType_i64) { | 3363 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3245 NeedsCmpxchg = true; | 3364 NeedsCmpxchg = true; |
| 3246 Op_Lo = &TargetX86Base<Machine>::_sub; | 3365 Op_Lo = &TargetX86Base<Machine>::_sub; |
| 3247 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3366 Op_Hi = &TargetX86Base<Machine>::_sbb; |
| 3248 break; | 3367 break; |
| 3249 } | 3368 } |
| 3250 typename Traits::X86OperandMem *Addr = | 3369 typename Traits::X86OperandMem *Addr = |
| 3251 formMemoryOperand(Ptr, Dest->getType()); | 3370 formMemoryOperand(Ptr, Dest->getType()); |
| 3252 const bool Locked = true; | 3371 const bool Locked = true; |
| 3253 Variable *T = nullptr; | 3372 Variable *T = nullptr; |
| 3254 _mov(T, Val); | 3373 _mov(T, Val); |
| (...skipping 16 matching lines...) Expand all Loading... |
| 3271 NeedsCmpxchg = true; | 3390 NeedsCmpxchg = true; |
| 3272 Op_Lo = &TargetX86Base<Machine>::_and; | 3391 Op_Lo = &TargetX86Base<Machine>::_and; |
| 3273 Op_Hi = &TargetX86Base<Machine>::_and; | 3392 Op_Hi = &TargetX86Base<Machine>::_and; |
| 3274 break; | 3393 break; |
| 3275 case Intrinsics::AtomicXor: | 3394 case Intrinsics::AtomicXor: |
| 3276 NeedsCmpxchg = true; | 3395 NeedsCmpxchg = true; |
| 3277 Op_Lo = &TargetX86Base<Machine>::_xor; | 3396 Op_Lo = &TargetX86Base<Machine>::_xor; |
| 3278 Op_Hi = &TargetX86Base<Machine>::_xor; | 3397 Op_Hi = &TargetX86Base<Machine>::_xor; |
| 3279 break; | 3398 break; |
| 3280 case Intrinsics::AtomicExchange: | 3399 case Intrinsics::AtomicExchange: |
| 3281 if (Dest->getType() == IceType_i64) { | 3400 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3282 NeedsCmpxchg = true; | 3401 NeedsCmpxchg = true; |
| 3283 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | 3402 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 3284 // just need to be moved to the ecx and ebx registers. | 3403 // just need to be moved to the ecx and ebx registers. |
| 3285 Op_Lo = nullptr; | 3404 Op_Lo = nullptr; |
| 3286 Op_Hi = nullptr; | 3405 Op_Hi = nullptr; |
| 3287 break; | 3406 break; |
| 3288 } | 3407 } |
| 3289 typename Traits::X86OperandMem *Addr = | 3408 typename Traits::X86OperandMem *Addr = |
| 3290 formMemoryOperand(Ptr, Dest->getType()); | 3409 formMemoryOperand(Ptr, Dest->getType()); |
| 3291 Variable *T = nullptr; | 3410 Variable *T = nullptr; |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3325 // .LABEL: | 3444 // .LABEL: |
| 3326 // mov <reg>, eax | 3445 // mov <reg>, eax |
| 3327 // op <reg>, [desired_adj] | 3446 // op <reg>, [desired_adj] |
| 3328 // lock cmpxchg [ptr], <reg> | 3447 // lock cmpxchg [ptr], <reg> |
| 3329 // jne .LABEL | 3448 // jne .LABEL |
| 3330 // mov <dest>, eax | 3449 // mov <dest>, eax |
| 3331 // | 3450 // |
| 3332 // If Op_{Lo,Hi} are nullptr, then just copy the value. | 3451 // If Op_{Lo,Hi} are nullptr, then just copy the value. |
| 3333 Val = legalize(Val); | 3452 Val = legalize(Val); |
| 3334 Type Ty = Val->getType(); | 3453 Type Ty = Val->getType(); |
| 3335 if (Ty == IceType_i64) { | 3454 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 3336 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3455 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3337 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3456 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3338 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3457 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3339 _mov(T_eax, loOperand(Addr)); | 3458 _mov(T_eax, loOperand(Addr)); |
| 3340 _mov(T_edx, hiOperand(Addr)); | 3459 _mov(T_edx, hiOperand(Addr)); |
| 3341 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3460 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3342 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3461 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3343 typename Traits::Insts::Label *Label = | 3462 typename Traits::Insts::Label *Label = |
| 3344 Traits::Insts::Label::create(Func, this); | 3463 Traits::Insts::Label::create(Func, this); |
| 3345 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; | 3464 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3457 if (Cttz) { | 3576 if (Cttz) { |
| 3458 _mov(T_Dest, ThirtyTwo); | 3577 _mov(T_Dest, ThirtyTwo); |
| 3459 } else { | 3578 } else { |
| 3460 Constant *SixtyThree = Ctx->getConstantInt32(63); | 3579 Constant *SixtyThree = Ctx->getConstantInt32(63); |
| 3461 _mov(T_Dest, SixtyThree); | 3580 _mov(T_Dest, SixtyThree); |
| 3462 } | 3581 } |
| 3463 _cmov(T_Dest, T, Traits::Cond::Br_ne); | 3582 _cmov(T_Dest, T, Traits::Cond::Br_ne); |
| 3464 if (!Cttz) { | 3583 if (!Cttz) { |
| 3465 _xor(T_Dest, ThirtyOne); | 3584 _xor(T_Dest, ThirtyOne); |
| 3466 } | 3585 } |
| 3467 if (Ty == IceType_i32) { | 3586 if (Traits::Is64Bit || Ty == IceType_i32) { |
| 3468 _mov(Dest, T_Dest); | 3587 _mov(Dest, T_Dest); |
| 3469 return; | 3588 return; |
| 3470 } | 3589 } |
| 3471 _add(T_Dest, ThirtyTwo); | 3590 _add(T_Dest, ThirtyTwo); |
| 3472 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3591 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3473 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3592 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3474 // Will be using "test" on this, so we need a registerized variable. | 3593 // Will be using "test" on this, so we need a registerized variable. |
| 3475 Variable *SecondVar = legalizeToReg(SecondVal); | 3594 Variable *SecondVar = legalizeToReg(SecondVal); |
| 3476 Variable *T_Dest2 = makeReg(IceType_i32); | 3595 Variable *T_Dest2 = makeReg(IceType_i32); |
| 3477 if (Cttz) { | 3596 if (Cttz) { |
| (...skipping 568 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4046 return; | 4165 return; |
| 4047 } | 4166 } |
| 4048 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4167 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
| 4049 // But if SrcT is immediate, we might be able to do better, as | 4168 // But if SrcT is immediate, we might be able to do better, as |
| 4050 // the cmov instruction doesn't allow an immediate operand: | 4169 // the cmov instruction doesn't allow an immediate operand: |
| 4051 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t | 4170 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t |
| 4052 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { | 4171 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { |
| 4053 std::swap(SrcT, SrcF); | 4172 std::swap(SrcT, SrcF); |
| 4054 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); | 4173 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); |
| 4055 } | 4174 } |
| 4056 if (DestTy == IceType_i64) { | 4175 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 4057 SrcT = legalizeUndef(SrcT); | 4176 SrcT = legalizeUndef(SrcT); |
| 4058 SrcF = legalizeUndef(SrcF); | 4177 SrcF = legalizeUndef(SrcF); |
| 4059 // Set the low portion. | 4178 // Set the low portion. |
| 4060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4179 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 4061 Variable *TLo = nullptr; | 4180 Variable *TLo = nullptr; |
| 4062 Operand *SrcFLo = legalize(loOperand(SrcF)); | 4181 Operand *SrcFLo = legalize(loOperand(SrcF)); |
| 4063 _mov(TLo, SrcFLo); | 4182 _mov(TLo, SrcFLo); |
| 4064 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); | 4183 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); |
| 4065 _cmov(TLo, SrcTLo, Cond); | 4184 _cmov(TLo, SrcTLo, Cond); |
| 4066 _mov(DestLo, TLo); | 4185 _mov(DestLo, TLo); |
| 4067 // Set the high portion. | 4186 // Set the high portion. |
| 4068 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4187 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4069 Variable *THi = nullptr; | 4188 Variable *THi = nullptr; |
| 4070 Operand *SrcFHi = legalize(hiOperand(SrcF)); | 4189 Operand *SrcFHi = legalize(hiOperand(SrcF)); |
| 4071 _mov(THi, SrcFHi); | 4190 _mov(THi, SrcFHi); |
| 4072 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); | 4191 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); |
| 4073 _cmov(THi, SrcTHi, Cond); | 4192 _cmov(THi, SrcTHi, Cond); |
| 4074 _mov(DestHi, THi); | 4193 _mov(DestHi, THi); |
| 4075 return; | 4194 return; |
| 4076 } | 4195 } |
| 4077 | 4196 |
| 4078 assert(DestTy == IceType_i16 || DestTy == IceType_i32); | 4197 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || |
| 4198 (Traits::Is64Bit && DestTy == IceType_i64)); |
| 4079 Variable *T = nullptr; | 4199 Variable *T = nullptr; |
| 4080 SrcF = legalize(SrcF); | 4200 SrcF = legalize(SrcF); |
| 4081 _mov(T, SrcF); | 4201 _mov(T, SrcF); |
| 4082 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4202 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4083 _cmov(T, SrcT, Cond); | 4203 _cmov(T, SrcT, Cond); |
| 4084 _mov(Dest, T); | 4204 _mov(Dest, T); |
| 4085 } | 4205 } |
| 4086 | 4206 |
| 4087 template <class Machine> | 4207 template <class Machine> |
| 4088 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | 4208 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { |
| 4089 Operand *Value = Inst->getData(); | 4209 Operand *Value = Inst->getData(); |
| 4090 Operand *Addr = Inst->getAddr(); | 4210 Operand *Addr = Inst->getAddr(); |
| 4091 typename Traits::X86OperandMem *NewAddr = | 4211 typename Traits::X86OperandMem *NewAddr = |
| 4092 formMemoryOperand(Addr, Value->getType()); | 4212 formMemoryOperand(Addr, Value->getType()); |
| 4093 Type Ty = NewAddr->getType(); | 4213 Type Ty = NewAddr->getType(); |
| 4094 | 4214 |
| 4095 if (Ty == IceType_i64) { | 4215 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 4096 Value = legalizeUndef(Value); | 4216 Value = legalizeUndef(Value); |
| 4097 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); | 4217 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); |
| 4098 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); | 4218 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); |
| 4099 _store(ValueHi, | 4219 _store(ValueHi, |
| 4100 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); | 4220 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); |
| 4101 _store(ValueLo, | 4221 _store(ValueLo, |
| 4102 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); | 4222 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); |
| 4103 } else if (isVectorType(Ty)) { | 4223 } else if (isVectorType(Ty)) { |
| 4104 _storep(legalizeToReg(Value), NewAddr); | 4224 _storep(legalizeToReg(Value), NewAddr); |
| 4105 } else { | 4225 } else { |
| (...skipping 27 matching lines...) Expand all Loading... |
| 4133 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 4253 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 4134 Context.insert(NewStore); | 4254 Context.insert(NewStore); |
| 4135 } | 4255 } |
| 4136 } | 4256 } |
| 4137 | 4257 |
| 4138 template <class Machine> | 4258 template <class Machine> |
| 4139 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, | 4259 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, |
| 4140 uint64_t Min, uint64_t Max) { | 4260 uint64_t Min, uint64_t Max) { |
| 4141 // TODO(ascull): 64-bit should not reach here but only because it is not | 4261 // TODO(ascull): 64-bit should not reach here but only because it is not |
| 4142 // implemented yet. This should be able to handle the 64-bit case. | 4262 // implemented yet. This should be able to handle the 64-bit case. |
| 4143 assert(Comparison->getType() != IceType_i64); | 4263 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); |
| 4144 // Subtracting 0 is a nop so don't do it | 4264 // Subtracting 0 is a nop so don't do it |
| 4145 if (Min != 0) { | 4265 if (Min != 0) { |
| 4146 // Avoid clobbering the comparison by copying it | 4266 // Avoid clobbering the comparison by copying it |
| 4147 Variable *T = nullptr; | 4267 Variable *T = nullptr; |
| 4148 _mov(T, Comparison); | 4268 _mov(T, Comparison); |
| 4149 _sub(T, Ctx->getConstantInt32(Min)); | 4269 _sub(T, Ctx->getConstantInt32(Min)); |
| 4150 Comparison = T; | 4270 Comparison = T; |
| 4151 } | 4271 } |
| 4152 | 4272 |
| 4153 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 4273 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4232 | 4352 |
| 4233 template <class Machine> | 4353 template <class Machine> |
| 4234 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 4354 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { |
| 4235 // Group cases together and navigate through them with a binary search | 4355 // Group cases together and navigate through them with a binary search |
| 4236 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 4356 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
| 4237 Operand *Src0 = Inst->getComparison(); | 4357 Operand *Src0 = Inst->getComparison(); |
| 4238 CfgNode *DefaultTarget = Inst->getLabelDefault(); | 4358 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
| 4239 | 4359 |
| 4240 assert(CaseClusters.size() != 0); // Should always be at least one | 4360 assert(CaseClusters.size() != 0); // Should always be at least one |
| 4241 | 4361 |
| 4242 if (Src0->getType() == IceType_i64) { | 4362 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 4243 Src0 = legalize(Src0); // get Base/Index into physical registers | 4363 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4244 Operand *Src0Lo = loOperand(Src0); | 4364 Operand *Src0Lo = loOperand(Src0); |
| 4245 Operand *Src0Hi = hiOperand(Src0); | 4365 Operand *Src0Hi = hiOperand(Src0); |
| 4246 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 4366 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
| 4247 // TODO(ascull): handle 64-bit case properly (currently naive version) | 4367 // TODO(ascull): handle 64-bit case properly (currently naive version) |
| 4248 // This might be handled by a higher level lowering of switches. | 4368 // This might be handled by a higher level lowering of switches. |
| 4249 SizeT NumCases = Inst->getNumCases(); | 4369 SizeT NumCases = Inst->getNumCases(); |
| 4250 if (NumCases >= 2) { | 4370 if (NumCases >= 2) { |
| 4251 Src0Lo = legalizeToReg(Src0Lo); | 4371 Src0Lo = legalizeToReg(Src0Lo); |
| 4252 Src0Hi = legalizeToReg(Src0Hi); | 4372 Src0Hi = legalizeToReg(Src0Hi); |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4437 // that follows. This means that the original Store instruction is | 4557 // that follows. This means that the original Store instruction is |
| 4438 // still there, either because the value being stored is used beyond | 4558 // still there, either because the value being stored is used beyond |
| 4439 // the Store instruction, or because dead code elimination did not | 4559 // the Store instruction, or because dead code elimination did not |
| 4440 // happen. In either case, we cancel RMW lowering (and the caller | 4560 // happen. In either case, we cancel RMW lowering (and the caller |
| 4441 // deletes the RMW instruction). | 4561 // deletes the RMW instruction). |
| 4442 if (!RMW->isLastUse(RMW->getBeacon())) | 4562 if (!RMW->isLastUse(RMW->getBeacon())) |
| 4443 return; | 4563 return; |
| 4444 Operand *Src = RMW->getData(); | 4564 Operand *Src = RMW->getData(); |
| 4445 Type Ty = Src->getType(); | 4565 Type Ty = Src->getType(); |
| 4446 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); | 4566 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
| 4447 if (Ty == IceType_i64) { | 4567 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 4448 Src = legalizeUndef(Src); | 4568 Src = legalizeUndef(Src); |
| 4449 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); | 4569 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); |
| 4450 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); | 4570 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); |
| 4451 typename Traits::X86OperandMem *AddrLo = | 4571 typename Traits::X86OperandMem *AddrLo = |
| 4452 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); | 4572 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); |
| 4453 typename Traits::X86OperandMem *AddrHi = | 4573 typename Traits::X86OperandMem *AddrHi = |
| 4454 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); | 4574 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); |
| 4455 switch (RMW->getOp()) { | 4575 switch (RMW->getOp()) { |
| 4456 default: | 4576 default: |
| 4457 // TODO(stichnot): Implement other arithmetic operators. | 4577 // TODO(stichnot): Implement other arithmetic operators. |
| (...skipping 13 matching lines...) Expand all Loading... |
| 4471 case InstArithmetic::Or: | 4591 case InstArithmetic::Or: |
| 4472 _or_rmw(AddrLo, SrcLo); | 4592 _or_rmw(AddrLo, SrcLo); |
| 4473 _or_rmw(AddrHi, SrcHi); | 4593 _or_rmw(AddrHi, SrcHi); |
| 4474 return; | 4594 return; |
| 4475 case InstArithmetic::Xor: | 4595 case InstArithmetic::Xor: |
| 4476 _xor_rmw(AddrLo, SrcLo); | 4596 _xor_rmw(AddrLo, SrcLo); |
| 4477 _xor_rmw(AddrHi, SrcHi); | 4597 _xor_rmw(AddrHi, SrcHi); |
| 4478 return; | 4598 return; |
| 4479 } | 4599 } |
| 4480 } else { | 4600 } else { |
| 4481 // i8, i16, i32 | 4601 // x86-32: i8, i16, i32 |
| 4602 // x86-64: i8, i16, i32, i64 |
| 4482 switch (RMW->getOp()) { | 4603 switch (RMW->getOp()) { |
| 4483 default: | 4604 default: |
| 4484 // TODO(stichnot): Implement other arithmetic operators. | 4605 // TODO(stichnot): Implement other arithmetic operators. |
| 4485 break; | 4606 break; |
| 4486 case InstArithmetic::Add: | 4607 case InstArithmetic::Add: |
| 4487 Src = legalize(Src, Legal_Reg | Legal_Imm); | 4608 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| 4488 _add_rmw(Addr, Src); | 4609 _add_rmw(Addr, Src); |
| 4489 return; | 4610 return; |
| 4490 case InstArithmetic::Sub: | 4611 case InstArithmetic::Sub: |
| 4491 Src = legalize(Src, Legal_Reg | Legal_Imm); | 4612 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 4516 } else { | 4637 } else { |
| 4517 TargetLowering::lowerOther(Instr); | 4638 TargetLowering::lowerOther(Instr); |
| 4518 } | 4639 } |
| 4519 } | 4640 } |
| 4520 | 4641 |
| 4521 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4642 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4522 /// preserve integrity of liveness analysis. Undef values are also | 4643 /// preserve integrity of liveness analysis. Undef values are also |
| 4523 /// turned into zeroes, since loOperand() and hiOperand() don't expect | 4644 /// turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4524 /// Undef input. | 4645 /// Undef input. |
| 4525 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { | 4646 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| 4526 // Pause constant blinding or pooling, blinding or pooling will be done later | 4647 if (Traits::Is64Bit) { |
| 4527 // during phi lowering assignments | 4648 // On x86-64 we don't need to prelower phis -- the architecture can handle |
| 4649 // 64-bit integer natively. |
| 4650 return; |
| 4651 } |
| 4652 |
| 4653 // Pause constant blinding or pooling, blinding or pooling will be done |
| 4654 // later during phi lowering assignments |
| 4528 BoolFlagSaver B(RandomizationPoolingPaused, true); | 4655 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 4529 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( | 4656 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( |
| 4530 this, Context.getNode(), Func); | 4657 this, Context.getNode(), Func); |
| 4531 } | 4658 } |
| 4532 | 4659 |
| 4533 // There is no support for loading or emitting vector constants, so the | 4660 // There is no support for loading or emitting vector constants, so the |
| 4534 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | 4661 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
| 4535 // etc. are initialized with register operations. | 4662 // etc. are initialized with register operations. |
| 4536 // | 4663 // |
| 4537 // TODO(wala): Add limited support for vector constants so that | 4664 // TODO(wala): Add limited support for vector constants so that |
| (...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4678 if (auto *Const = llvm::dyn_cast<Constant>(From)) { | 4805 if (auto *Const = llvm::dyn_cast<Constant>(From)) { |
| 4679 if (llvm::isa<ConstantUndef>(Const)) { | 4806 if (llvm::isa<ConstantUndef>(Const)) { |
| 4680 From = legalizeUndef(Const, RegNum); | 4807 From = legalizeUndef(Const, RegNum); |
| 4681 if (isVectorType(Ty)) | 4808 if (isVectorType(Ty)) |
| 4682 return From; | 4809 return From; |
| 4683 Const = llvm::cast<Constant>(From); | 4810 Const = llvm::cast<Constant>(From); |
| 4684 } | 4811 } |
| 4685 // There should be no constants of vector type (other than undef). | 4812 // There should be no constants of vector type (other than undef). |
| 4686 assert(!isVectorType(Ty)); | 4813 assert(!isVectorType(Ty)); |
| 4687 | 4814 |
| 4815 // If the operand is a 64 bit constant integer we need to legalize it to a |
| 4816 // register in x86-64. |
| 4817 if (Traits::Is64Bit) { |
| 4818 if (llvm::isa<ConstantInteger64>(Const)) { |
| 4819 Variable *V = copyToReg(Const, RegNum); |
| 4820 V->setWeightInfinite(); |
| 4821 return V; |
| 4822 } |
| 4823 } |
| 4824 |
| 4688 // If the operand is an 32 bit constant integer, we should check | 4825 // If the operand is an 32 bit constant integer, we should check |
| 4689 // whether we need to randomize it or pool it. | 4826 // whether we need to randomize it or pool it. |
| 4690 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { | 4827 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { |
| 4691 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); | 4828 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); |
| 4692 if (NewConst != Const) { | 4829 if (NewConst != Const) { |
| 4693 return NewConst; | 4830 return NewConst; |
| 4694 } | 4831 } |
| 4695 } | 4832 } |
| 4696 | 4833 |
| 4697 // Convert a scalar floating point constant into an explicit | 4834 // Convert a scalar floating point constant into an explicit |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4815 } | 4952 } |
| 4816 // Do legalization, which contains randomization/pooling | 4953 // Do legalization, which contains randomization/pooling |
| 4817 // or do randomization/pooling. | 4954 // or do randomization/pooling. |
| 4818 return llvm::cast<typename Traits::X86OperandMem>( | 4955 return llvm::cast<typename Traits::X86OperandMem>( |
| 4819 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); | 4956 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); |
| 4820 } | 4957 } |
| 4821 | 4958 |
| 4822 template <class Machine> | 4959 template <class Machine> |
| 4823 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { | 4960 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { |
| 4824 // There aren't any 64-bit integer registers for x86-32. | 4961 // There aren't any 64-bit integer registers for x86-32. |
| 4825 assert(Type != IceType_i64); | 4962 assert(Traits::Is64Bit || Type != IceType_i64); |
| 4826 Variable *Reg = Func->makeVariable(Type); | 4963 Variable *Reg = Func->makeVariable(Type); |
| 4827 if (RegNum == Variable::NoRegister) | 4964 if (RegNum == Variable::NoRegister) |
| 4828 Reg->setWeightInfinite(); | 4965 Reg->setWeightInfinite(); |
| 4829 else | 4966 else |
| 4830 Reg->setRegNum(RegNum); | 4967 Reg->setRegNum(RegNum); |
| 4831 return Reg; | 4968 return Reg; |
| 4832 } | 4969 } |
| 4833 | 4970 |
| 4834 template <class Machine> void TargetX86Base<Machine>::postLower() { | 4971 template <class Machine> void TargetX86Base<Machine>::postLower() { |
| 4835 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 4972 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| (...skipping 11 matching lines...) Expand all Loading... |
| 4847 | 4984 |
| 4848 template <class Machine> | 4985 template <class Machine> |
| 4849 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { | 4986 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { |
| 4850 if (!BuildDefs::dump()) | 4987 if (!BuildDefs::dump()) |
| 4851 return; | 4988 return; |
| 4852 Ostream &Str = Ctx->getStrEmit(); | 4989 Ostream &Str = Ctx->getStrEmit(); |
| 4853 Str << getConstantPrefix() << C->getValue(); | 4990 Str << getConstantPrefix() << C->getValue(); |
| 4854 } | 4991 } |
| 4855 | 4992 |
| 4856 template <class Machine> | 4993 template <class Machine> |
| 4857 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { | 4994 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const { |
| 4858 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 4995 if (!Traits::Is64Bit) { |
| 4996 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); |
| 4997 } else { |
| 4998 if (!BuildDefs::dump()) |
| 4999 return; |
| 5000 Ostream &Str = Ctx->getStrEmit(); |
| 5001 Str << getConstantPrefix() << C->getValue(); |
| 5002 } |
| 4859 } | 5003 } |
| 4860 | 5004 |
| 4861 template <class Machine> | 5005 template <class Machine> |
| 4862 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { | 5006 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { |
| 4863 if (!BuildDefs::dump()) | 5007 if (!BuildDefs::dump()) |
| 4864 return; | 5008 return; |
| 4865 Ostream &Str = Ctx->getStrEmit(); | 5009 Ostream &Str = Ctx->getStrEmit(); |
| 4866 C->emitPoolLabel(Str); | 5010 C->emitPoolLabel(Str); |
| 4867 } | 5011 } |
| 4868 | 5012 |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4993 Constant *Mask1 = Ctx->getConstantInt( | 5137 Constant *Mask1 = Ctx->getConstantInt( |
| 4994 MemOperand->getOffset()->getType(), Cookie + Value); | 5138 MemOperand->getOffset()->getType(), Cookie + Value); |
| 4995 Constant *Mask2 = | 5139 Constant *Mask2 = |
| 4996 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); | 5140 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); |
| 4997 | 5141 |
| 4998 typename Traits::X86OperandMem *TempMemOperand = | 5142 typename Traits::X86OperandMem *TempMemOperand = |
| 4999 Traits::X86OperandMem::create(Func, MemOperand->getType(), | 5143 Traits::X86OperandMem::create(Func, MemOperand->getType(), |
| 5000 MemOperand->getBase(), Mask1); | 5144 MemOperand->getBase(), Mask1); |
| 5001 // If we have already assigned a physical register, we must come from | 5145 // If we have already assigned a physical register, we must come from |
| 5002 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse | 5146 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse |
| 5003 // the assigned register as this assignment is that start of its use-def | 5147 // the assigned register as this assignment is that start of its |
| 5004 // chain. So we add RegNum argument here. | 5148 // use-def chain. So we add RegNum argument here. |
| 5005 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); | 5149 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); |
| 5006 _lea(RegTemp, TempMemOperand); | 5150 _lea(RegTemp, TempMemOperand); |
| 5007 // As source operand doesn't use the dstreg, we don't need to add | 5151 // As source operand doesn't use the dstreg, we don't need to add |
| 5008 // _set_dest_nonkillable(). | 5152 // _set_dest_nonkillable(). |
| 5009 // But if we use the same Dest Reg, that is, with RegNum | 5153 // But if we use the same Dest Reg, that is, with RegNum |
| 5010 // assigned, we should add this _set_dest_nonkillable() | 5154 // assigned, we should add this _set_dest_nonkillable() |
| 5011 if (RegNum != Variable::NoRegister) | 5155 if (RegNum != Variable::NoRegister) |
| 5012 _set_dest_nonkillable(); | 5156 _set_dest_nonkillable(); |
| 5013 | 5157 |
| 5014 typename Traits::X86OperandMem *NewMemOperand = | 5158 typename Traits::X86OperandMem *NewMemOperand = |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5077 } | 5221 } |
| 5078 // the offset is not eligible for blinding or pooling, return the original | 5222 // the offset is not eligible for blinding or pooling, return the original |
| 5079 // mem operand | 5223 // mem operand |
| 5080 return MemOperand; | 5224 return MemOperand; |
| 5081 } | 5225 } |
| 5082 | 5226 |
| 5083 } // end of namespace X86Internal | 5227 } // end of namespace X86Internal |
| 5084 } // end of namespace Ice | 5228 } // end of namespace Ice |
| 5085 | 5229 |
| 5086 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5230 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |