Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 70 // NumUses counts the number of times Var is used as a source operand in the | 70 // NumUses counts the number of times Var is used as a source operand in the |
| 71 // basic block. If IsComplex is true and there is more than one use of Var, | 71 // basic block. If IsComplex is true and there is more than one use of Var, |
| 72 // then the folding optimization is disabled for Var. | 72 // then the folding optimization is disabled for Var. |
| 73 uint32_t NumUses = 0; | 73 uint32_t NumUses = 0; |
| 74 }; | 74 }; |
| 75 | 75 |
| 76 template <class MachineTraits> class BoolFolding { | 76 template <class MachineTraits> class BoolFolding { |
| 77 public: | 77 public: |
| 78 enum BoolFoldingProducerKind { | 78 enum BoolFoldingProducerKind { |
| 79 PK_None, | 79 PK_None, |
| 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. | |
| 80 PK_Icmp32, | 81 PK_Icmp32, |
| 81 PK_Icmp64, | 82 PK_Icmp64, |
| 82 PK_Fcmp, | 83 PK_Fcmp, |
| 83 PK_Trunc | 84 PK_Trunc |
| 84 }; | 85 }; |
| 85 | 86 |
| 86 /// Currently the actual enum values are not used (other than CK_None), but we | 87 /// Currently the actual enum values are not used (other than CK_None), but we |
| 87 /// go ahead and produce them anyway for symmetry with the | 88 /// go ahead and produce them anyway for symmetry with the |
| 88 /// BoolFoldingProducerKind. | 89 /// BoolFoldingProducerKind. |
| 89 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 113 }; | 114 }; |
| 114 | 115 |
| 115 template <class MachineTraits> | 116 template <class MachineTraits> |
| 116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) | 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) |
| 117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} |
| 118 | 119 |
| 119 template <class MachineTraits> | 120 template <class MachineTraits> |
| 120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind |
| 121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { |
| 122 if (llvm::isa<InstIcmp>(Instr)) { | 123 if (llvm::isa<InstIcmp>(Instr)) { |
| 123 if (Instr->getSrc(0)->getType() != IceType_i64) | 124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) |
| 124 return PK_Icmp32; | 125 return PK_Icmp32; |
| 125 return PK_None; // TODO(stichnot): actually PK_Icmp64; | 126 return PK_None; // TODO(stichnot): actually PK_Icmp64; |
| 126 } | 127 } |
| 127 return PK_None; // TODO(stichnot): remove this | 128 return PK_None; // TODO(stichnot): remove this |
| 128 | 129 |
| 129 if (llvm::isa<InstFcmp>(Instr)) | 130 if (llvm::isa<InstFcmp>(Instr)) |
| 130 return PK_Fcmp; | 131 return PK_Fcmp; |
| 131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
| 132 switch (Cast->getCastKind()) { | 133 switch (Cast->getCastKind()) { |
| 133 default: | 134 default: |
| (...skipping 502 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 636 // instruction or equivalent. | 637 // instruction or equivalent. |
| 637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | 638 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { |
| 638 // An InstLoad always qualifies. | 639 // An InstLoad always qualifies. |
| 639 LoadDest = Load->getDest(); | 640 LoadDest = Load->getDest(); |
| 640 const bool DoLegalize = false; | 641 const bool DoLegalize = false; |
| 641 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | 642 LoadSrc = formMemoryOperand(Load->getSourceAddress(), |
| 642 LoadDest->getType(), DoLegalize); | 643 LoadDest->getType(), DoLegalize); |
| 643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | 644 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { |
| 644 // An AtomicLoad intrinsic qualifies as long as it has a valid | 645 // An AtomicLoad intrinsic qualifies as long as it has a valid |
| 645 // memory ordering, and can be implemented in a single | 646 // memory ordering, and can be implemented in a single |
| 646 // instruction (i.e., not i64). | 647 // instruction (i.e., not i64 on x86-32). |
| 647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | 648 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; |
| 648 if (ID == Intrinsics::AtomicLoad && | 649 if (ID == Intrinsics::AtomicLoad && |
| 649 Intrin->getDest()->getType() != IceType_i64 && | 650 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && |
| 650 Intrinsics::isMemoryOrderValid( | 651 Intrinsics::isMemoryOrderValid( |
| 651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | 652 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { |
| 652 LoadDest = Intrin->getDest(); | 653 LoadDest = Intrin->getDest(); |
| 653 const bool DoLegalize = false; | 654 const bool DoLegalize = false; |
| 654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | 655 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), |
| 655 DoLegalize); | 656 DoLegalize); |
| 656 } | 657 } |
| 657 } | 658 } |
| 658 // A Load instruction can be folded into the following | 659 // A Load instruction can be folded into the following |
| 659 // instruction only if the following instruction ends the Load's | 660 // instruction only if the following instruction ends the Load's |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 717 template <class Machine> | 718 template <class Machine> |
| 718 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 719 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 720 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { |
| 720 return Br->optimizeBranch(NextNode); | 721 return Br->optimizeBranch(NextNode); |
| 721 } | 722 } |
| 722 return false; | 723 return false; |
| 723 } | 724 } |
| 724 | 725 |
| 725 template <class Machine> | 726 template <class Machine> |
| 726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 727 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { |
| 728 // Special case: never allow partial reads/writes to/from %rBP and %rSP. | |
| 729 if (RegNum == Traits::RegisterSet::Reg_esp || | |
| 730 RegNum == Traits::RegisterSet::Reg_ebp) | |
| 731 Ty = Traits::WordType; | |
| 727 if (Ty == IceType_void) | 732 if (Ty == IceType_void) |
| 728 Ty = IceType_i32; | 733 Ty = IceType_i32; |
| 729 if (PhysicalRegisters[Ty].empty()) | 734 if (PhysicalRegisters[Ty].empty()) |
| 730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 735 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
| 731 assert(RegNum < PhysicalRegisters[Ty].size()); | 736 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 732 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 737 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 733 if (Reg == nullptr) { | 738 if (Reg == nullptr) { |
| 734 Reg = Func->makeVariable(Ty); | 739 Reg = Func->makeVariable(Ty); |
| 735 Reg->setRegNum(RegNum); | 740 Reg->setRegNum(RegNum); |
| 736 PhysicalRegisters[Ty][RegNum] = Reg; | 741 PhysicalRegisters[Ty][RegNum] = Reg; |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 763 } | 768 } |
| 764 int32_t Offset = Var->getStackOffset(); | 769 int32_t Offset = Var->getStackOffset(); |
| 765 int32_t BaseRegNum = Var->getBaseRegNum(); | 770 int32_t BaseRegNum = Var->getBaseRegNum(); |
| 766 if (BaseRegNum == Variable::NoRegister) { | 771 if (BaseRegNum == Variable::NoRegister) { |
| 767 BaseRegNum = getFrameOrStackReg(); | 772 BaseRegNum = getFrameOrStackReg(); |
| 768 if (!hasFramePointer()) | 773 if (!hasFramePointer()) |
| 769 Offset += getStackAdjustment(); | 774 Offset += getStackAdjustment(); |
| 770 } | 775 } |
| 771 if (Offset) | 776 if (Offset) |
| 772 Str << Offset; | 777 Str << Offset; |
| 773 const Type FrameSPTy = IceType_i32; | 778 const Type FrameSPTy = Traits::WordType; |
| 774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; | 779 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; |
| 775 } | 780 } |
| 776 | 781 |
| 777 template <class Machine> | 782 template <class Machine> |
| 778 typename TargetX86Base<Machine>::Traits::Address | 783 typename TargetX86Base<Machine>::Traits::Address |
| 779 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | 784 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { |
| 780 if (Var->hasReg()) | 785 if (Var->hasReg()) |
| 781 llvm_unreachable("Stack Variable has a register assigned"); | 786 llvm_unreachable("Stack Variable has a register assigned"); |
| 782 if (Var->getWeight().isInf()) { | 787 if (Var->getWeight().isInf()) { |
| 783 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 788 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 803 /// function generates an instruction to copy Arg into its assigned | 808 /// function generates an instruction to copy Arg into its assigned |
| 804 /// register if applicable. | 809 /// register if applicable. |
| 805 template <class Machine> | 810 template <class Machine> |
| 806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 811 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 807 Variable *FramePtr, | 812 Variable *FramePtr, |
| 808 size_t BasicFrameOffset, | 813 size_t BasicFrameOffset, |
| 809 size_t &InArgsSizeBytes) { | 814 size_t &InArgsSizeBytes) { |
| 810 Variable *Lo = Arg->getLo(); | 815 Variable *Lo = Arg->getLo(); |
| 811 Variable *Hi = Arg->getHi(); | 816 Variable *Hi = Arg->getHi(); |
| 812 Type Ty = Arg->getType(); | 817 Type Ty = Arg->getType(); |
| 813 if (Lo && Hi && Ty == IceType_i64) { | 818 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) { |
| 814 // TODO(jpp): This special case is not needed for x86-64. | |
| 815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 819 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 820 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 821 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 822 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 819 return; | 823 return; |
| 820 } | 824 } |
| 821 if (isVectorType(Ty)) { | 825 if (isVectorType(Ty)) { |
| 822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 826 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 823 } | 827 } |
| 824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 828 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 829 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 826 if (Arg->hasReg()) { | 830 if (Arg->hasReg()) { |
| 827 assert(Ty != IceType_i64); | 831 assert(Ty != IceType_i64 || Traits::Is64Bit); |
| 828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( | 832 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( |
| 829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 833 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
| 830 if (isVectorType(Arg->getType())) { | 834 if (isVectorType(Arg->getType())) { |
| 831 _movp(Arg, Mem); | 835 _movp(Arg, Mem); |
| 832 } else { | 836 } else { |
| 833 _mov(Arg, Mem); | 837 _mov(Arg, Mem); |
| 834 } | 838 } |
| 835 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 839 // This argument-copying instruction uses an explicit Traits::X86OperandMem |
| 836 // operand instead of a Variable, so its fill-from-stack operation has to be | 840 // operand instead of a Variable, so its fill-from-stack operation has to be |
| 837 // tracked separately for statistics. | 841 // tracked separately for statistics. |
| 838 Ctx->statsUpdateFills(); | 842 Ctx->statsUpdateFills(); |
| 839 } | 843 } |
| 840 } | 844 } |
| 841 | 845 |
| 842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 846 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 843 // TODO(jpp): this is wrong for x86-64. | 847 return Traits::WordType; |
| 844 return IceType_i32; | |
| 845 } | 848 } |
| 846 | 849 |
| 847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { | 850 template <class Machine> |
| 851 template <typename T> | |
| 852 typename std::enable_if<!T::Is64Bit, void>::type | |
| 853 TargetX86Base<Machine>::split64(Variable *Var) { | |
| 848 switch (Var->getType()) { | 854 switch (Var->getType()) { |
| 849 default: | 855 default: |
| 850 return; | 856 return; |
| 851 case IceType_i64: | 857 case IceType_i64: |
| 852 // TODO: Only consider F64 if we need to push each half when | 858 // TODO: Only consider F64 if we need to push each half when |
| 853 // passing as an argument to a function call. Note that each half | 859 // passing as an argument to a function call. Note that each half |
| 854 // is still typed as I32. | 860 // is still typed as I32. |
| 855 case IceType_f64: | 861 case IceType_f64: |
| 856 break; | 862 break; |
| 857 } | 863 } |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 869 Hi->setName(Func, Var->getName(Func) + "__hi"); | 875 Hi->setName(Func, Var->getName(Func) + "__hi"); |
| 870 } | 876 } |
| 871 Var->setLoHi(Lo, Hi); | 877 Var->setLoHi(Lo, Hi); |
| 872 if (Var->getIsArg()) { | 878 if (Var->getIsArg()) { |
| 873 Lo->setIsArg(); | 879 Lo->setIsArg(); |
| 874 Hi->setIsArg(); | 880 Hi->setIsArg(); |
| 875 } | 881 } |
| 876 } | 882 } |
| 877 | 883 |
| 878 template <class Machine> | 884 template <class Machine> |
| 879 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { | 885 template <typename T> |
| 886 typename std::enable_if<!T::Is64Bit, Operand>::type * | |
| 887 TargetX86Base<Machine>::loOperand(Operand *Operand) { | |
| 880 assert(Operand->getType() == IceType_i64 || | 888 assert(Operand->getType() == IceType_i64 || |
| 881 Operand->getType() == IceType_f64); | 889 Operand->getType() == IceType_f64); |
| 882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 890 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 883 return Operand; | 891 return Operand; |
| 884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 892 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 885 split64(Var); | 893 split64(Var); |
| 886 return Var->getLo(); | 894 return Var->getLo(); |
| 887 } | 895 } |
| 888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 896 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 897 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 898 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
| 891 // Check if we need to blind/pool the constant. | 899 // Check if we need to blind/pool the constant. |
| 892 return legalize(ConstInt); | 900 return legalize(ConstInt); |
| 893 } | 901 } |
| 894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 902 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { |
| 895 auto *MemOperand = Traits::X86OperandMem::create( | 903 auto *MemOperand = Traits::X86OperandMem::create( |
| 896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 904 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
| 897 Mem->getShift(), Mem->getSegmentRegister()); | 905 Mem->getShift(), Mem->getSegmentRegister()); |
| 898 // Test if we should randomize or pool the offset, if so randomize it or | 906 // Test if we should randomize or pool the offset, if so randomize it or |
| 899 // pool it then create mem operand with the blinded/pooled constant. | 907 // pool it then create mem operand with the blinded/pooled constant. |
| 900 // Otherwise, return the mem operand as ordinary mem operand. | 908 // Otherwise, return the mem operand as ordinary mem operand. |
| 901 return legalize(MemOperand); | 909 return legalize(MemOperand); |
| 902 } | 910 } |
| 903 llvm_unreachable("Unsupported operand type"); | 911 llvm_unreachable("Unsupported operand type"); |
| 904 return nullptr; | 912 return nullptr; |
| 905 } | 913 } |
| 906 | 914 |
| 907 template <class Machine> | 915 template <class Machine> |
| 908 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { | 916 template <typename T> |
| 917 typename std::enable_if<!T::Is64Bit, Operand>::type * | |
| 918 TargetX86Base<Machine>::hiOperand(Operand *Operand) { | |
| 909 assert(Operand->getType() == IceType_i64 || | 919 assert(Operand->getType() == IceType_i64 || |
| 910 Operand->getType() == IceType_f64); | 920 Operand->getType() == IceType_f64); |
| 911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 921 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 912 return Operand; | 922 return Operand; |
| 913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 923 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { |
| 914 split64(Var); | 924 split64(Var); |
| 915 return Var->getHi(); | 925 return Var->getHi(); |
| 916 } | 926 } |
| 917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 927 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 928 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1100 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1110 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| 1101 Variable *Dest = Inst->getDest(); | 1111 Variable *Dest = Inst->getDest(); |
| 1102 Operand *Src0 = legalize(Inst->getSrc(0)); | 1112 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1103 Operand *Src1 = legalize(Inst->getSrc(1)); | 1113 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1104 if (Inst->isCommutative()) { | 1114 if (Inst->isCommutative()) { |
| 1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | 1115 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) |
| 1106 std::swap(Src0, Src1); | 1116 std::swap(Src0, Src1); |
| 1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) | 1117 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) |
| 1108 std::swap(Src0, Src1); | 1118 std::swap(Src0, Src1); |
| 1109 } | 1119 } |
| 1110 if (Dest->getType() == IceType_i64) { | 1120 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1111 // These helper-call-involved instructions are lowered in this | 1121 // These x86-32 helper-call-involved instructions are lowered in this |
| 1112 // separate switch. This is because loOperand() and hiOperand() | 1122 // separate switch. This is because loOperand() and hiOperand() |
| 1113 // may insert redundant instructions for constant blinding and | 1123 // may insert redundant instructions for constant blinding and |
| 1114 // pooling. Such redundant instructions will fail liveness analysis | 1124 // pooling. Such redundant instructions will fail liveness analysis |
| 1115 // under -Om1 setting. And, actually these arguments do not need | 1125 // under -Om1 setting. And, actually these arguments do not need |
| 1116 // to be processed with loOperand() and hiOperand() to be used. | 1126 // to be processed with loOperand() and hiOperand() to be used. |
| 1117 switch (Inst->getOp()) { | 1127 switch (Inst->getOp()) { |
| 1118 case InstArithmetic::Udiv: { | 1128 case InstArithmetic::Udiv: { |
| 1119 const SizeT MaxSrcs = 2; | 1129 const SizeT MaxSrcs = 2; |
| 1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1130 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); |
| 1121 Call->addArg(Inst->getSrc(0)); | 1131 Call->addArg(Inst->getSrc(0)); |
| (...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1659 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1650 // this ever becomes a problem we can introduce a pseudo rem instruction | 1660 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1651 // that returns the remainder in %al directly (and uses a mov for copying | 1661 // that returns the remainder in %al directly (and uses a mov for copying |
| 1652 // %ah to %al.) | 1662 // %ah to %al.) |
| 1653 static constexpr uint8_t AlSizeInBits = 8; | 1663 static constexpr uint8_t AlSizeInBits = 8; |
| 1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1664 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1655 _mov(Dest, T); | 1665 _mov(Dest, T); |
| 1656 Context.insert(InstFakeUse::create(Func, T_eax)); | 1666 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1657 } else { | 1667 } else { |
| 1658 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1668 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); | 1669 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); |
| 1670 _mov(T_edx, Zero); | |
| 1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1671 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1661 _div(T_edx, Src1, T); | 1672 _div(T_edx, Src1, T); |
| 1662 _mov(Dest, T_edx); | 1673 _mov(Dest, T_edx); |
| 1663 } | 1674 } |
| 1664 break; | 1675 break; |
| 1665 case InstArithmetic::Srem: | 1676 case InstArithmetic::Srem: |
| 1666 // TODO(stichnot): Enable this after doing better performance | 1677 // TODO(stichnot): Enable this after doing better performance |
| 1667 // and cross testing. | 1678 // and cross testing. |
| 1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1679 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1669 // Optimize mod by constant power of 2, but not for Om1 or O0, | 1680 // Optimize mod by constant power of 2, but not for Om1 or O0, |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 1725 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't |
| 1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1726 // mov %ah, %al because it would make x86-64 codegen more complicated. If |
| 1716 // this ever becomes a problem we can introduce a pseudo rem instruction | 1727 // this ever becomes a problem we can introduce a pseudo rem instruction |
| 1717 // that returns the remainder in %al directly (and uses a mov for copying | 1728 // that returns the remainder in %al directly (and uses a mov for copying |
| 1718 // %ah to %al.) | 1729 // %ah to %al.) |
| 1719 static constexpr uint8_t AlSizeInBits = 8; | 1730 static constexpr uint8_t AlSizeInBits = 8; |
| 1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1731 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); |
| 1721 _mov(Dest, T); | 1732 _mov(Dest, T); |
| 1722 Context.insert(InstFakeUse::create(Func, T_eax)); | 1733 Context.insert(InstFakeUse::create(Func, T_eax)); |
| 1723 } else { | 1734 } else { |
| 1724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 1735 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); |
| 1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1726 _cbwdq(T_edx, T); | 1737 _cbwdq(T_edx, T); |
| 1727 _idiv(T_edx, Src1, T); | 1738 _idiv(T_edx, Src1, T); |
| 1728 _mov(Dest, T_edx); | 1739 _mov(Dest, T_edx); |
| 1729 } | 1740 } |
| 1730 break; | 1741 break; |
| 1731 case InstArithmetic::Fadd: | 1742 case InstArithmetic::Fadd: |
| 1732 _mov(T, Src0); | 1743 _mov(T, Src0); |
| 1733 _addss(T, Src1); | 1744 _addss(T, Src1); |
| 1734 _mov(Dest, T); | 1745 _mov(Dest, T); |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 1758 return lowerCall(Call); | 1769 return lowerCall(Call); |
| 1759 } | 1770 } |
| 1760 } | 1771 } |
| 1761 } | 1772 } |
| 1762 | 1773 |
| 1763 template <class Machine> | 1774 template <class Machine> |
| 1764 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { | 1775 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { |
| 1765 Variable *Dest = Inst->getDest(); | 1776 Variable *Dest = Inst->getDest(); |
| 1766 Operand *Src0 = Inst->getSrc(0); | 1777 Operand *Src0 = Inst->getSrc(0); |
| 1767 assert(Dest->getType() == Src0->getType()); | 1778 assert(Dest->getType() == Src0->getType()); |
| 1768 if (Dest->getType() == IceType_i64) { | 1779 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1769 Src0 = legalize(Src0); | 1780 Src0 = legalize(Src0); |
| 1770 Operand *Src0Lo = loOperand(Src0); | 1781 Operand *Src0Lo = loOperand(Src0); |
| 1771 Operand *Src0Hi = hiOperand(Src0); | 1782 Operand *Src0Hi = hiOperand(Src0); |
| 1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1783 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1784 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1774 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1785 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 1775 _mov(T_Lo, Src0Lo); | 1786 _mov(T_Lo, Src0Lo); |
| 1776 _mov(DestLo, T_Lo); | 1787 _mov(DestLo, T_Lo); |
| 1777 _mov(T_Hi, Src0Hi); | 1788 _mov(T_Hi, Src0Hi); |
| 1778 _mov(DestHi, T_Hi); | 1789 _mov(DestHi, T_Hi); |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1863 SizeT ShiftAmount = | 1874 SizeT ShiftAmount = |
| 1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - | 1875 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 1865 1; | 1876 1; |
| 1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 1877 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 1867 Variable *T = makeReg(DestTy); | 1878 Variable *T = makeReg(DestTy); |
| 1868 _movp(T, Src0RM); | 1879 _movp(T, Src0RM); |
| 1869 _psll(T, ShiftConstant); | 1880 _psll(T, ShiftConstant); |
| 1870 _psra(T, ShiftConstant); | 1881 _psra(T, ShiftConstant); |
| 1871 _movp(Dest, T); | 1882 _movp(Dest, T); |
| 1872 } | 1883 } |
| 1873 } else if (Dest->getType() == IceType_i64) { | 1884 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | 1885 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 |
| 1875 Constant *Shift = Ctx->getConstantInt32(31); | 1886 Constant *Shift = Ctx->getConstantInt32(31); |
| 1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1887 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1888 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1878 Variable *T_Lo = makeReg(DestLo->getType()); | 1889 Variable *T_Lo = makeReg(DestLo->getType()); |
| 1879 if (Src0RM->getType() == IceType_i32) { | 1890 if (Src0RM->getType() == IceType_i32) { |
| 1880 _mov(T_Lo, Src0RM); | 1891 _mov(T_Lo, Src0RM); |
| 1881 } else if (Src0RM->getType() == IceType_i1) { | 1892 } else if (Src0RM->getType() == IceType_i1) { |
| 1882 _movzx(T_Lo, Src0RM); | 1893 _movzx(T_Lo, Src0RM); |
| 1883 _shl(T_Lo, Shift); | 1894 _shl(T_Lo, Shift); |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1923 case InstCast::Zext: { | 1934 case InstCast::Zext: { |
| 1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1935 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 1925 if (isVectorType(Dest->getType())) { | 1936 if (isVectorType(Dest->getType())) { |
| 1926 // onemask = materialize(1,1,...); dest = onemask & src | 1937 // onemask = materialize(1,1,...); dest = onemask & src |
| 1927 Type DestTy = Dest->getType(); | 1938 Type DestTy = Dest->getType(); |
| 1928 Variable *OneMask = makeVectorOfOnes(DestTy); | 1939 Variable *OneMask = makeVectorOfOnes(DestTy); |
| 1929 Variable *T = makeReg(DestTy); | 1940 Variable *T = makeReg(DestTy); |
| 1930 _movp(T, Src0RM); | 1941 _movp(T, Src0RM); |
| 1931 _pand(T, OneMask); | 1942 _pand(T, OneMask); |
| 1932 _movp(Dest, T); | 1943 _movp(Dest, T); |
| 1933 } else if (Dest->getType() == IceType_i64) { | 1944 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 1934 // t1=movzx src; dst.lo=t1; dst.hi=0 | 1945 // t1=movzx src; dst.lo=t1; dst.hi=0 |
| 1935 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1946 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1947 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1948 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1938 Variable *Tmp = makeReg(DestLo->getType()); | 1949 Variable *Tmp = makeReg(DestLo->getType()); |
| 1939 if (Src0RM->getType() == IceType_i32) { | 1950 if (Src0RM->getType() == IceType_i32) { |
| 1940 _mov(Tmp, Src0RM); | 1951 _mov(Tmp, Src0RM); |
| 1941 } else { | 1952 } else { |
| 1942 _movzx(Tmp, Src0RM); | 1953 _movzx(Tmp, Src0RM); |
| 1943 } | 1954 } |
| 1944 if (Src0RM->getType() == IceType_i1) { | 1955 if (Src0RM->getType() == IceType_i1) { |
| 1945 Constant *One = Ctx->getConstantInt32(1); | 1956 Constant *One = Ctx->getConstantInt32(1); |
| 1946 _and(Tmp, One); | 1957 _and(Tmp, One); |
| 1947 } | 1958 } |
| 1948 _mov(DestLo, Tmp); | 1959 _mov(DestLo, Tmp); |
| 1949 _mov(DestHi, Zero); | 1960 _mov(DestHi, Zero); |
| 1950 } else if (Src0RM->getType() == IceType_i1) { | 1961 } else if (Src0RM->getType() == IceType_i1) { |
| 1951 // t = Src0RM; t &= 1; Dest = t | 1962 // t = Src0RM; t &= 1; Dest = t |
| 1952 Constant *One = Ctx->getConstantInt32(1); | 1963 Constant *One = Ctx->getConstantInt32(1); |
| 1953 Type DestTy = Dest->getType(); | 1964 Type DestTy = Dest->getType(); |
| 1954 Variable *T; | 1965 Variable *T; |
| 1955 if (DestTy == IceType_i8) { | 1966 T = makeReg(IceType_i32); |
| 1956 T = makeReg(DestTy); | 1967 _mov(T, Src0RM); |
| 1957 _mov(T, Src0RM); | 1968 _and(T, One); |
| 1958 } else { | 1969 if (!Traits::Is64Bit) { |
| 1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. | 1970 assert(DestTy != IceType_i64); |
| 1960 T = makeReg(IceType_i32); | 1971 } else if (DestTy == IceType_i64) { |
| 1961 _movzx(T, Src0RM); | 1972 // In x86-64 we should be able to rely on mov reg, reg to zero extend T |
| 1973 // into Dest. At this point we can't ensure Dest will live in a | |
| 1974 // register. Therefore, we use _movzx, which the assembler rightly | |
| 1975 // converts to a 32-bit mov. A new temporary is created because the | |
| 1976 // assembler does not know how to movzx to a memory location. | |
| 1977 Variable *T_1 = makeReg(IceType_i64); | |
| 1978 _movzx(T_1, T); | |
| 1979 T = T_1; | |
| 1962 } | 1980 } |
| 1963 _and(T, One); | |
| 1964 _mov(Dest, T); | 1981 _mov(Dest, T); |
| 1965 } else { | 1982 } else { |
| 1966 // t1 = movzx src; dst = t1 | 1983 // t1 = movzx src; dst = t1 |
| 1967 Variable *T = makeReg(Dest->getType()); | 1984 Variable *T = makeReg(Dest->getType()); |
| 1968 _movzx(T, Src0RM); | 1985 _movzx(T, Src0RM); |
| 1969 _mov(Dest, T); | 1986 _mov(Dest, T); |
| 1970 } | 1987 } |
| 1971 break; | 1988 break; |
| 1972 } | 1989 } |
| 1973 case InstCast::Trunc: { | 1990 case InstCast::Trunc: { |
| 1974 if (isVectorType(Dest->getType())) { | 1991 if (isVectorType(Dest->getType())) { |
| 1975 // onemask = materialize(1,1,...); dst = src & onemask | 1992 // onemask = materialize(1,1,...); dst = src & onemask |
| 1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1993 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 1977 Type Src0Ty = Src0RM->getType(); | 1994 Type Src0Ty = Src0RM->getType(); |
| 1978 Variable *OneMask = makeVectorOfOnes(Src0Ty); | 1995 Variable *OneMask = makeVectorOfOnes(Src0Ty); |
| 1979 Variable *T = makeReg(Dest->getType()); | 1996 Variable *T = makeReg(Dest->getType()); |
| 1980 _movp(T, Src0RM); | 1997 _movp(T, Src0RM); |
| 1981 _pand(T, OneMask); | 1998 _pand(T, OneMask); |
| 1982 _movp(Dest, T); | 1999 _movp(Dest, T); |
| 1983 } else { | 2000 } else { |
| 1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2001 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 1985 if (Src0->getType() == IceType_i64) | 2002 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) |
| 1986 Src0 = loOperand(Src0); | 2003 Src0 = loOperand(Src0); |
| 1987 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2004 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 1988 // t1 = trunc Src0RM; Dest = t1 | 2005 // t1 = trunc Src0RM; Dest = t1 |
| 1989 Variable *T = nullptr; | 2006 Variable *T = nullptr; |
| 1990 _mov(T, Src0RM); | 2007 _mov(T, Src0RM); |
| 1991 if (Dest->getType() == IceType_i1) | 2008 if (Dest->getType() == IceType_i1) |
| 1992 _and(T, Ctx->getConstantInt1(1)); | 2009 _and(T, Ctx->getConstantInt1(1)); |
| 1993 _mov(Dest, T); | 2010 _mov(Dest, T); |
| 1994 } | 2011 } |
| 1995 break; | 2012 break; |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 2006 case InstCast::Fptosi: | 2023 case InstCast::Fptosi: |
| 2007 if (isVectorType(Dest->getType())) { | 2024 if (isVectorType(Dest->getType())) { |
| 2008 assert(Dest->getType() == IceType_v4i32 && | 2025 assert(Dest->getType() == IceType_v4i32 && |
| 2009 Inst->getSrc(0)->getType() == IceType_v4f32); | 2026 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2027 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2028 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2012 Src0RM = legalizeToReg(Src0RM); | 2029 Src0RM = legalizeToReg(Src0RM); |
| 2013 Variable *T = makeReg(Dest->getType()); | 2030 Variable *T = makeReg(Dest->getType()); |
| 2014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2031 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2015 _movp(Dest, T); | 2032 _movp(Dest, T); |
| 2016 } else if (Dest->getType() == IceType_i64) { | 2033 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2017 // Use a helper for converting floating-point values to 64-bit | 2034 // Use a helper for converting floating-point values to 64-bit |
| 2018 // integers. SSE2 appears to have no way to convert from xmm | 2035 // integers. SSE2 appears to have no way to convert from xmm |
| 2019 // registers to something like the edx:eax register pair, and | 2036 // registers to something like the edx:eax register pair, and |
| 2020 // gcc and clang both want to use x87 instructions complete with | 2037 // gcc and clang both want to use x87 instructions complete with |
| 2021 // temporary manipulation of the status word. This helper is | 2038 // temporary manipulation of the status word. This helper is |
| 2022 // not needed for x86-64. | 2039 // not needed for x86-64. |
| 2023 split64(Dest); | 2040 split64(Dest); |
| 2024 const SizeT MaxSrcs = 1; | 2041 const SizeT MaxSrcs = 1; |
| 2025 Type SrcType = Inst->getSrc(0)->getType(); | 2042 Type SrcType = Inst->getSrc(0)->getType(); |
| 2026 InstCall *Call = | 2043 InstCall *Call = |
| 2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 2044 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 2028 : H_fptosi_f64_i64, | 2045 : H_fptosi_f64_i64, |
| 2029 Dest, MaxSrcs); | 2046 Dest, MaxSrcs); |
| 2030 Call->addArg(Inst->getSrc(0)); | 2047 Call->addArg(Inst->getSrc(0)); |
| 2031 lowerCall(Call); | 2048 lowerCall(Call); |
| 2032 } else { | 2049 } else { |
| 2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2050 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2051 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2035 Variable *T_1 = makeReg(IceType_i32); | 2052 Variable *T_1 = nullptr; |
| 2053 if (Traits::Is64Bit && Dest->getType() == IceType_i64) { | |
| 2054 T_1 = makeReg(IceType_i64); | |
| 2055 } else { | |
| 2056 assert(Dest->getType() != IceType_i64); | |
| 2057 T_1 = makeReg(IceType_i32); | |
| 2058 } | |
| 2059 // cvt() requires its integer argument to be a GPR. | |
| 2060 T_1->setWeightInfinite(); | |
| 2036 Variable *T_2 = makeReg(Dest->getType()); | 2061 Variable *T_2 = makeReg(Dest->getType()); |
| 2037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2062 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2063 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2039 if (Dest->getType() == IceType_i1) | 2064 if (Dest->getType() == IceType_i1) |
| 2040 _and(T_2, Ctx->getConstantInt1(1)); | 2065 _and(T_2, Ctx->getConstantInt1(1)); |
| 2041 _mov(Dest, T_2); | 2066 _mov(Dest, T_2); |
| 2042 } | 2067 } |
| 2043 break; | 2068 break; |
| 2044 case InstCast::Fptoui: | 2069 case InstCast::Fptoui: |
| 2045 if (isVectorType(Dest->getType())) { | 2070 if (isVectorType(Dest->getType())) { |
| 2046 assert(Dest->getType() == IceType_v4i32 && | 2071 assert(Dest->getType() == IceType_v4i32 && |
| 2047 Inst->getSrc(0)->getType() == IceType_v4f32); | 2072 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2048 const SizeT MaxSrcs = 1; | 2073 const SizeT MaxSrcs = 1; |
| 2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2074 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
| 2050 Call->addArg(Inst->getSrc(0)); | 2075 Call->addArg(Inst->getSrc(0)); |
| 2051 lowerCall(Call); | 2076 lowerCall(Call); |
| 2052 } else if (Dest->getType() == IceType_i64 || | 2077 } else if (Dest->getType() == IceType_i64 || |
| 2053 Dest->getType() == IceType_i32) { | 2078 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
| 2054 // Use a helper for both x86-32 and x86-64. | 2079 // Use a helper for both x86-32 and x86-64. |
| 2055 split64(Dest); | 2080 if (!Traits::Is64Bit) |
| 2081 split64(Dest); | |
| 2056 const SizeT MaxSrcs = 1; | 2082 const SizeT MaxSrcs = 1; |
| 2057 Type DestType = Dest->getType(); | 2083 Type DestType = Dest->getType(); |
| 2058 Type SrcType = Inst->getSrc(0)->getType(); | 2084 Type SrcType = Inst->getSrc(0)->getType(); |
| 2059 IceString TargetString; | 2085 IceString TargetString; |
| 2060 if (isInt32Asserting32Or64(DestType)) { | 2086 if (Traits::Is64Bit) { |
| 2087 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | |
| 2088 : H_fptoui_f64_i64; | |
| 2089 } else if (isInt32Asserting32Or64(DestType)) { | |
| 2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2090 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
| 2062 : H_fptoui_f64_i32; | 2091 : H_fptoui_f64_i32; |
| 2063 } else { | 2092 } else { |
| 2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2093 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2065 : H_fptoui_f64_i64; | 2094 : H_fptoui_f64_i64; |
| 2066 } | 2095 } |
| 2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2096 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| 2068 Call->addArg(Inst->getSrc(0)); | 2097 Call->addArg(Inst->getSrc(0)); |
| 2069 lowerCall(Call); | 2098 lowerCall(Call); |
| 2070 return; | 2099 return; |
| 2071 } else { | 2100 } else { |
| 2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2102 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2074 Variable *T_1 = makeReg(IceType_i32); | 2103 assert(Dest->getType() != IceType_i64); |
| 2104 Variable *T_1 = nullptr; | |
| 2105 if (Traits::Is64Bit && Dest->getType() == IceType_i32) { | |
| 2106 T_1 = makeReg(IceType_i64); | |
| 2107 } else { | |
| 2108 assert(Dest->getType() != IceType_i32); | |
| 2109 T_1 = makeReg(IceType_i32); | |
| 2110 } | |
| 2111 T_1->setWeightInfinite(); | |
| 2075 Variable *T_2 = makeReg(Dest->getType()); | 2112 Variable *T_2 = makeReg(Dest->getType()); |
| 2076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2113 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2114 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2078 if (Dest->getType() == IceType_i1) | 2115 if (Dest->getType() == IceType_i1) |
| 2079 _and(T_2, Ctx->getConstantInt1(1)); | 2116 _and(T_2, Ctx->getConstantInt1(1)); |
| 2080 _mov(Dest, T_2); | 2117 _mov(Dest, T_2); |
| 2081 } | 2118 } |
| 2082 break; | 2119 break; |
| 2083 case InstCast::Sitofp: | 2120 case InstCast::Sitofp: |
| 2084 if (isVectorType(Dest->getType())) { | 2121 if (isVectorType(Dest->getType())) { |
| 2085 assert(Dest->getType() == IceType_v4f32 && | 2122 assert(Dest->getType() == IceType_v4f32 && |
| 2086 Inst->getSrc(0)->getType() == IceType_v4i32); | 2123 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2124 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2125 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2089 Src0RM = legalizeToReg(Src0RM); | 2126 Src0RM = legalizeToReg(Src0RM); |
| 2090 Variable *T = makeReg(Dest->getType()); | 2127 Variable *T = makeReg(Dest->getType()); |
| 2091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2128 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
| 2092 _movp(Dest, T); | 2129 _movp(Dest, T); |
| 2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) { | 2130 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
| 2094 // Use a helper for x86-32. | 2131 // Use a helper for x86-32. |
| 2095 const SizeT MaxSrcs = 1; | 2132 const SizeT MaxSrcs = 1; |
| 2096 Type DestType = Dest->getType(); | 2133 Type DestType = Dest->getType(); |
| 2097 InstCall *Call = | 2134 InstCall *Call = |
| 2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2135 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 |
| 2099 : H_sitofp_i64_f64, | 2136 : H_sitofp_i64_f64, |
| 2100 Dest, MaxSrcs); | 2137 Dest, MaxSrcs); |
| 2101 // TODO: Call the correct compiler-rt helper function. | 2138 // TODO: Call the correct compiler-rt helper function. |
| 2102 Call->addArg(Inst->getSrc(0)); | 2139 Call->addArg(Inst->getSrc(0)); |
| 2103 lowerCall(Call); | 2140 lowerCall(Call); |
| 2104 return; | 2141 return; |
| 2105 } else { | 2142 } else { |
| 2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2143 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2107 // Sign-extend the operand. | 2144 // Sign-extend the operand. |
| 2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2145 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2109 Variable *T_1 = makeReg(IceType_i32); | 2146 Variable *T_1 = nullptr; |
| 2147 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { | |
| 2148 T_1 = makeReg(IceType_i64); | |
| 2149 } else { | |
| 2150 assert(Src0RM->getType() != IceType_i64); | |
| 2151 T_1 = makeReg(IceType_i32); | |
| 2152 } | |
| 2153 T_1->setWeightInfinite(); | |
| 2110 Variable *T_2 = makeReg(Dest->getType()); | 2154 Variable *T_2 = makeReg(Dest->getType()); |
| 2111 if (Src0RM->getType() == IceType_i32) | 2155 if (Src0RM->getType() == T_1->getType()) |
| 2112 _mov(T_1, Src0RM); | 2156 _mov(T_1, Src0RM); |
| 2113 else | 2157 else |
| 2114 _movsx(T_1, Src0RM); | 2158 _movsx(T_1, Src0RM); |
| 2115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2159 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2116 _mov(Dest, T_2); | 2160 _mov(Dest, T_2); |
| 2117 } | 2161 } |
| 2118 break; | 2162 break; |
| 2119 case InstCast::Uitofp: { | 2163 case InstCast::Uitofp: { |
| 2120 Operand *Src0 = Inst->getSrc(0); | 2164 Operand *Src0 = Inst->getSrc(0); |
| 2121 if (isVectorType(Src0->getType())) { | 2165 if (isVectorType(Src0->getType())) { |
| 2122 assert(Dest->getType() == IceType_v4f32 && | 2166 assert(Dest->getType() == IceType_v4f32 && |
| 2123 Src0->getType() == IceType_v4i32); | 2167 Src0->getType() == IceType_v4i32); |
| 2124 const SizeT MaxSrcs = 1; | 2168 const SizeT MaxSrcs = 1; |
| 2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2169 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
| 2126 Call->addArg(Src0); | 2170 Call->addArg(Src0); |
| 2127 lowerCall(Call); | 2171 lowerCall(Call); |
| 2128 } else if (Src0->getType() == IceType_i64 || | 2172 } else if (Src0->getType() == IceType_i64 || |
| 2129 Src0->getType() == IceType_i32) { | 2173 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
| 2130 // Use a helper for x86-32 and x86-64. Also use a helper for | 2174 // Use a helper for x86-32 and x86-64. Also use a helper for |
| 2131 // i32 on x86-32. | 2175 // i32 on x86-32. |
| 2132 const SizeT MaxSrcs = 1; | 2176 const SizeT MaxSrcs = 1; |
| 2133 Type DestType = Dest->getType(); | 2177 Type DestType = Dest->getType(); |
| 2134 IceString TargetString; | 2178 IceString TargetString; |
| 2135 if (isInt32Asserting32Or64(Src0->getType())) { | 2179 if (isInt32Asserting32Or64(Src0->getType())) { |
| 2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2180 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 |
| 2137 : H_uitofp_i32_f64; | 2181 : H_uitofp_i32_f64; |
| 2138 } else { | 2182 } else { |
| 2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2183 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 |
| 2140 : H_uitofp_i64_f64; | 2184 : H_uitofp_i64_f64; |
| 2141 } | 2185 } |
| 2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2186 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| 2143 Call->addArg(Src0); | 2187 Call->addArg(Src0); |
| 2144 lowerCall(Call); | 2188 lowerCall(Call); |
| 2145 return; | 2189 return; |
| 2146 } else { | 2190 } else { |
| 2147 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2191 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2148 // Zero-extend the operand. | 2192 // Zero-extend the operand. |
| 2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2193 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2150 Variable *T_1 = makeReg(IceType_i32); | 2194 Variable *T_1 = nullptr; |
| 2195 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { | |
| 2196 T_1 = makeReg(IceType_i64); | |
| 2197 } else { | |
| 2198 assert(Src0RM->getType() != IceType_i64); | |
| 2199 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); | |
| 2200 T_1 = makeReg(IceType_i32); | |
| 2201 } | |
| 2202 T_1->setWeightInfinite(); | |
| 2151 Variable *T_2 = makeReg(Dest->getType()); | 2203 Variable *T_2 = makeReg(Dest->getType()); |
| 2152 if (Src0RM->getType() == IceType_i32) | 2204 if (Src0RM->getType() == T_1->getType()) |
| 2153 _mov(T_1, Src0RM); | 2205 _mov(T_1, Src0RM); |
| 2154 else | 2206 else |
| 2155 _movzx(T_1, Src0RM); | 2207 _movzx(T_1, Src0RM); |
| 2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2208 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2157 _mov(Dest, T_2); | 2209 _mov(Dest, T_2); |
| 2158 } | 2210 } |
| 2159 break; | 2211 break; |
| 2160 } | 2212 } |
| 2161 case InstCast::Bitcast: { | 2213 case InstCast::Bitcast: { |
| 2162 Operand *Src0 = Inst->getSrc(0); | 2214 Operand *Src0 = Inst->getSrc(0); |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2198 typename Traits::SpillVariable *SpillVar = | 2250 typename Traits::SpillVariable *SpillVar = |
| 2199 Func->makeVariable<typename Traits::SpillVariable>(SrcType); | 2251 Func->makeVariable<typename Traits::SpillVariable>(SrcType); |
| 2200 SpillVar->setLinkedTo(Dest); | 2252 SpillVar->setLinkedTo(Dest); |
| 2201 Variable *Spill = SpillVar; | 2253 Variable *Spill = SpillVar; |
| 2202 Spill->setWeight(RegWeight::Zero); | 2254 Spill->setWeight(RegWeight::Zero); |
| 2203 _mov(T, Src0RM); | 2255 _mov(T, Src0RM); |
| 2204 _mov(Spill, T); | 2256 _mov(Spill, T); |
| 2205 _mov(Dest, Spill); | 2257 _mov(Dest, Spill); |
| 2206 } break; | 2258 } break; |
| 2207 case IceType_i64: { | 2259 case IceType_i64: { |
| 2208 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2260 assert(Src0->getType() == IceType_f64); |
| 2209 assert(Src0RM->getType() == IceType_f64); | 2261 if (Traits::Is64Bit) { |
| 2210 // a.i64 = bitcast b.f64 ==> | 2262 // Movd requires its fp argument (in this case, the bitcast source) to |
| 2211 // s.f64 = spill b.f64 | 2263 // be an xmm register. |
| 2212 // t_lo.i32 = lo(s.f64) | 2264 Variable *Src0R = legalizeToReg(Src0); |
| 2213 // a_lo.i32 = t_lo.i32 | 2265 Variable *T = makeReg(IceType_i64); |
| 2214 // t_hi.i32 = hi(s.f64) | 2266 _movd(T, Src0R); |
| 2215 // a_hi.i32 = t_hi.i32 | 2267 _mov(Dest, T); |
| 2216 Operand *SpillLo, *SpillHi; | 2268 } else { |
| 2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { | 2269 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2270 // a.i64 = bitcast b.f64 ==> | |
| 2271 // s.f64 = spill b.f64 | |
| 2272 // t_lo.i32 = lo(s.f64) | |
| 2273 // a_lo.i32 = t_lo.i32 | |
| 2274 // t_hi.i32 = hi(s.f64) | |
| 2275 // a_hi.i32 = t_hi.i32 | |
| 2276 Operand *SpillLo, *SpillHi; | |
| 2277 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { | |
| 2278 typename Traits::SpillVariable *SpillVar = | |
| 2279 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | |
| 2280 SpillVar->setLinkedTo(Src0Var); | |
| 2281 Variable *Spill = SpillVar; | |
| 2282 Spill->setWeight(RegWeight::Zero); | |
| 2283 _movq(Spill, Src0RM); | |
| 2284 SpillLo = Traits::VariableSplit::create(Func, Spill, | |
| 2285 Traits::VariableSplit::Low); | |
| 2286 SpillHi = Traits::VariableSplit::create(Func, Spill, | |
| 2287 Traits::VariableSplit::High); | |
| 2288 } else { | |
| 2289 SpillLo = loOperand(Src0RM); | |
| 2290 SpillHi = hiOperand(Src0RM); | |
| 2291 } | |
| 2292 | |
| 2293 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 2294 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 2295 Variable *T_Lo = makeReg(IceType_i32); | |
| 2296 Variable *T_Hi = makeReg(IceType_i32); | |
| 2297 | |
| 2298 _mov(T_Lo, SpillLo); | |
| 2299 _mov(DestLo, T_Lo); | |
| 2300 _mov(T_Hi, SpillHi); | |
| 2301 _mov(DestHi, T_Hi); | |
| 2302 } | |
| 2303 } break; | |
| 2304 case IceType_f64: { | |
| 2305 assert(Src0->getType() == IceType_i64); | |
| 2306 if (Traits::Is64Bit) { | |
| 2307 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 2308 Variable *T = makeReg(IceType_f64); | |
| 2309 // Movd requires its fp argument (in this case, the bitcast destination) | |
| 2310 // to be an xmm register. | |
| 2311 T->setWeightInfinite(); | |
| 2312 _movd(T, Src0RM); | |
| 2313 _mov(Dest, T); | |
| 2314 } else { | |
| 2315 Src0 = legalize(Src0); | |
| 2316 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { | |
| 2317 Variable *T = Func->makeVariable(Dest->getType()); | |
| 2318 _movq(T, Src0); | |
| 2319 _movq(Dest, T); | |
| 2320 break; | |
| 2321 } | |
| 2322 // a.f64 = bitcast b.i64 ==> | |
| 2323 // t_lo.i32 = b_lo.i32 | |
| 2324 // FakeDef(s.f64) | |
| 2325 // lo(s.f64) = t_lo.i32 | |
| 2326 // t_hi.i32 = b_hi.i32 | |
| 2327 // hi(s.f64) = t_hi.i32 | |
| 2328 // a.f64 = s.f64 | |
| 2218 typename Traits::SpillVariable *SpillVar = | 2329 typename Traits::SpillVariable *SpillVar = |
| 2219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | 2330 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); |
| 2220 SpillVar->setLinkedTo(Src0Var); | 2331 SpillVar->setLinkedTo(Dest); |
| 2221 Variable *Spill = SpillVar; | 2332 Variable *Spill = SpillVar; |
| 2222 Spill->setWeight(RegWeight::Zero); | 2333 Spill->setWeight(RegWeight::Zero); |
| 2223 _movq(Spill, Src0RM); | 2334 |
| 2224 SpillLo = Traits::VariableSplit::create(Func, Spill, | 2335 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 2225 Traits::VariableSplit::Low); | 2336 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create( |
| 2226 SpillHi = Traits::VariableSplit::create(Func, Spill, | 2337 Func, Spill, Traits::VariableSplit::Low); |
| 2227 Traits::VariableSplit::High); | 2338 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create( |
| 2228 } else { | 2339 Func, Spill, Traits::VariableSplit::High); |
| 2229 SpillLo = loOperand(Src0RM); | 2340 _mov(T_Lo, loOperand(Src0)); |
| 2230 SpillHi = hiOperand(Src0RM); | 2341 // Technically, the Spill is defined after the _store happens, but |
| 2342 // SpillLo is considered a "use" of Spill so define Spill before it | |
| 2343 // is used. | |
| 2344 Context.insert(InstFakeDef::create(Func, Spill)); | |
| 2345 _store(T_Lo, SpillLo); | |
| 2346 _mov(T_Hi, hiOperand(Src0)); | |
| 2347 _store(T_Hi, SpillHi); | |
| 2348 _movq(Dest, Spill); | |
| 2231 } | 2349 } |
| 2232 | |
| 2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 2235 Variable *T_Lo = makeReg(IceType_i32); | |
| 2236 Variable *T_Hi = makeReg(IceType_i32); | |
| 2237 | |
| 2238 _mov(T_Lo, SpillLo); | |
| 2239 _mov(DestLo, T_Lo); | |
| 2240 _mov(T_Hi, SpillHi); | |
| 2241 _mov(DestHi, T_Hi); | |
| 2242 } break; | |
| 2243 case IceType_f64: { | |
| 2244 Src0 = legalize(Src0); | |
| 2245 assert(Src0->getType() == IceType_i64); | |
| 2246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { | |
| 2247 Variable *T = Func->makeVariable(Dest->getType()); | |
| 2248 _movq(T, Src0); | |
| 2249 _movq(Dest, T); | |
| 2250 break; | |
| 2251 } | |
| 2252 // a.f64 = bitcast b.i64 ==> | |
| 2253 // t_lo.i32 = b_lo.i32 | |
| 2254 // FakeDef(s.f64) | |
| 2255 // lo(s.f64) = t_lo.i32 | |
| 2256 // t_hi.i32 = b_hi.i32 | |
| 2257 // hi(s.f64) = t_hi.i32 | |
| 2258 // a.f64 = s.f64 | |
| 2259 typename Traits::SpillVariable *SpillVar = | |
| 2260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); | |
| 2261 SpillVar->setLinkedTo(Dest); | |
| 2262 Variable *Spill = SpillVar; | |
| 2263 Spill->setWeight(RegWeight::Zero); | |
| 2264 | |
| 2265 Variable *T_Lo = nullptr, *T_Hi = nullptr; | |
| 2266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create( | |
| 2267 Func, Spill, Traits::VariableSplit::Low); | |
| 2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create( | |
| 2269 Func, Spill, Traits::VariableSplit::High); | |
| 2270 _mov(T_Lo, loOperand(Src0)); | |
| 2271 // Technically, the Spill is defined after the _store happens, but | |
| 2272 // SpillLo is considered a "use" of Spill so define Spill before it | |
| 2273 // is used. | |
| 2274 Context.insert(InstFakeDef::create(Func, Spill)); | |
| 2275 _store(T_Lo, SpillLo); | |
| 2276 _mov(T_Hi, hiOperand(Src0)); | |
| 2277 _store(T_Hi, SpillHi); | |
| 2278 _movq(Dest, Spill); | |
| 2279 } break; | 2350 } break; |
| 2280 case IceType_v8i1: { | 2351 case IceType_v8i1: { |
| 2281 assert(Src0->getType() == IceType_i8); | 2352 assert(Src0->getType() == IceType_i8); |
| 2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); | 2353 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); |
| 2283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); | 2354 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); |
| 2284 // Arguments to functions are required to be at least 32 bits wide. | 2355 // Arguments to functions are required to be at least 32 bits wide. |
| 2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); | 2356 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); |
| 2286 Call->addArg(Src0AsI32); | 2357 Call->addArg(Src0AsI32); |
| 2287 lowerCall(Call); | 2358 lowerCall(Call); |
| 2288 } break; | 2359 } break; |
| (...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2679 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2609 _pxor(T, MinusOne); | 2680 _pxor(T, MinusOne); |
| 2610 } break; | 2681 } break; |
| 2611 } | 2682 } |
| 2612 | 2683 |
| 2613 _movp(Dest, T); | 2684 _movp(Dest, T); |
| 2614 eliminateNextVectorSextInstruction(Dest); | 2685 eliminateNextVectorSextInstruction(Dest); |
| 2615 return; | 2686 return; |
| 2616 } | 2687 } |
| 2617 | 2688 |
| 2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2689 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 2619 if (Src0->getType() == IceType_i64) { | 2690 lowerIcmp64(Inst); |
| 2620 InstIcmp::ICond Condition = Inst->getCondition(); | |
| 2621 size_t Index = static_cast<size_t>(Condition); | |
| 2622 assert(Index < Traits::TableIcmp64Size); | |
| 2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | |
| 2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | |
| 2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
| 2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
| 2627 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
| 2628 Constant *One = Ctx->getConstantInt32(1); | |
| 2629 typename Traits::Insts::Label *LabelFalse = | |
| 2630 Traits::Insts::Label::create(Func, this); | |
| 2631 typename Traits::Insts::Label *LabelTrue = | |
| 2632 Traits::Insts::Label::create(Func, this); | |
| 2633 _mov(Dest, One); | |
| 2634 _cmp(Src0HiRM, Src1HiRI); | |
| 2635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | |
| 2636 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | |
| 2637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | |
| 2638 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | |
| 2639 _cmp(Src0LoRM, Src1LoRI); | |
| 2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | |
| 2641 Context.insert(LabelFalse); | |
| 2642 _mov_nonkillable(Dest, Zero); | |
| 2643 Context.insert(LabelTrue); | |
| 2644 return; | 2691 return; |
| 2645 } | 2692 } |
| 2646 | 2693 |
| 2647 // cmp b, c | 2694 // cmp b, c |
| 2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); | 2695 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); |
| 2649 _cmp(Src0RM, Src1); | 2696 _cmp(Src0RM, Src1); |
| 2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); | 2697 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); |
| 2651 } | 2698 } |
| 2652 | 2699 |
| 2700 template <typename Machine> | |
| 2701 template <typename T> | |
| 2702 typename std::enable_if<!T::Is64Bit, void>::type | |
| 2703 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) { | |
| 2704 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | |
| 2705 Operand *Src0 = legalize(Inst->getSrc(0)); | |
| 2706 Operand *Src1 = legalize(Inst->getSrc(1)); | |
| 2707 Variable *Dest = Inst->getDest(); | |
| 2708 InstIcmp::ICond Condition = Inst->getCondition(); | |
| 2709 size_t Index = static_cast<size_t>(Condition); | |
| 2710 assert(Index < Traits::TableIcmp64Size); | |
| 2711 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | |
| 2712 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | |
| 2713 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
| 2714 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
| 2715 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
| 2716 Constant *One = Ctx->getConstantInt32(1); | |
| 2717 typename Traits::Insts::Label *LabelFalse = | |
| 2718 Traits::Insts::Label::create(Func, this); | |
| 2719 typename Traits::Insts::Label *LabelTrue = | |
| 2720 Traits::Insts::Label::create(Func, this); | |
| 2721 _mov(Dest, One); | |
| 2722 _cmp(Src0HiRM, Src1HiRI); | |
| 2723 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) | |
| 2724 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | |
| 2725 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) | |
| 2726 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | |
| 2727 _cmp(Src0LoRM, Src1LoRI); | |
| 2728 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | |
| 2729 Context.insert(LabelFalse); | |
| 2730 _mov_nonkillable(Dest, Zero); | |
| 2731 Context.insert(LabelTrue); | |
| 2732 } | |
| 2733 | |
| 2653 template <class Machine> | 2734 template <class Machine> |
| 2654 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 2735 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
| 2655 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 2736 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
| 2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 2737 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
| 2657 ConstantInteger32 *ElementIndex = | 2738 ConstantInteger32 *ElementIndex = |
| 2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 2739 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
| 2659 // Only constant indices are allowed in PNaCl IR. | 2740 // Only constant indices are allowed in PNaCl IR. |
| 2660 assert(ElementIndex); | 2741 assert(ElementIndex); |
| 2661 unsigned Index = ElementIndex->getValue(); | 2742 unsigned Index = ElementIndex->getValue(); |
| 2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 2743 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
| (...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2841 } | 2922 } |
| 2842 case Intrinsics::AtomicLoad: { | 2923 case Intrinsics::AtomicLoad: { |
| 2843 // We require the memory address to be naturally aligned. | 2924 // We require the memory address to be naturally aligned. |
| 2844 // Given that is the case, then normal loads are atomic. | 2925 // Given that is the case, then normal loads are atomic. |
| 2845 if (!Intrinsics::isMemoryOrderValid( | 2926 if (!Intrinsics::isMemoryOrderValid( |
| 2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) { | 2927 ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
| 2847 Func->setError("Unexpected memory ordering for AtomicLoad"); | 2928 Func->setError("Unexpected memory ordering for AtomicLoad"); |
| 2848 return; | 2929 return; |
| 2849 } | 2930 } |
| 2850 Variable *Dest = Instr->getDest(); | 2931 Variable *Dest = Instr->getDest(); |
| 2851 if (Dest->getType() == IceType_i64) { | 2932 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2852 // Follow what GCC does and use a movq instead of what lowerLoad() | 2933 // Follow what GCC does and use a movq instead of what lowerLoad() |
| 2853 // normally does (split the load into two). | 2934 // normally does (split the load into two). |
| 2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding | 2935 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding |
| 2855 // can't happen anyway, since this is x86-32 and integer arithmetic only | 2936 // can't happen anyway, since this is x86-32 and integer arithmetic only |
| 2856 // happens on 32-bit quantities. | 2937 // happens on 32-bit quantities. |
| 2857 Variable *T = makeReg(IceType_f64); | 2938 Variable *T = makeReg(IceType_f64); |
| 2858 typename Traits::X86OperandMem *Addr = | 2939 typename Traits::X86OperandMem *Addr = |
| 2859 formMemoryOperand(Instr->getArg(0), IceType_f64); | 2940 formMemoryOperand(Instr->getArg(0), IceType_f64); |
| 2860 _movq(T, Addr); | 2941 _movq(T, Addr); |
| 2861 // Then cast the bits back out of the XMM register to the i64 Dest. | 2942 // Then cast the bits back out of the XMM register to the i64 Dest. |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 2891 if (!Intrinsics::isMemoryOrderValid( | 2972 if (!Intrinsics::isMemoryOrderValid( |
| 2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 2973 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 2893 Func->setError("Unexpected memory ordering for AtomicStore"); | 2974 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 2894 return; | 2975 return; |
| 2895 } | 2976 } |
| 2896 // We require the memory address to be naturally aligned. | 2977 // We require the memory address to be naturally aligned. |
| 2897 // Given that is the case, then normal stores are atomic. | 2978 // Given that is the case, then normal stores are atomic. |
| 2898 // Add a fence after the store to make it visible. | 2979 // Add a fence after the store to make it visible. |
| 2899 Operand *Value = Instr->getArg(0); | 2980 Operand *Value = Instr->getArg(0); |
| 2900 Operand *Ptr = Instr->getArg(1); | 2981 Operand *Ptr = Instr->getArg(1); |
| 2901 if (Value->getType() == IceType_i64) { | 2982 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { |
| 2902 // Use a movq instead of what lowerStore() normally does | 2983 // Use a movq instead of what lowerStore() normally does |
| 2903 // (split the store into two), following what GCC does. | 2984 // (split the store into two), following what GCC does. |
| 2904 // Cast the bits from int -> to an xmm register first. | 2985 // Cast the bits from int -> to an xmm register first. |
| 2905 Variable *T = makeReg(IceType_f64); | 2986 Variable *T = makeReg(IceType_f64); |
| 2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); | 2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); |
| 2907 lowerCast(Cast); | 2988 lowerCast(Cast); |
| 2908 // Then store XMM w/ a movq. | 2989 // Then store XMM w/ a movq. |
| 2909 typename Traits::X86OperandMem *Addr = | 2990 typename Traits::X86OperandMem *Addr = |
| 2910 formMemoryOperand(Ptr, IceType_f64); | 2991 formMemoryOperand(Ptr, IceType_f64); |
| 2911 _storeq(T, Addr); | 2992 _storeq(T, Addr); |
| 2912 _mfence(); | 2993 _mfence(); |
| 2913 return; | 2994 return; |
| 2914 } | 2995 } |
| 2915 InstStore *Store = InstStore::create(Func, Value, Ptr); | 2996 InstStore *Store = InstStore::create(Func, Value, Ptr); |
| 2916 lowerStore(Store); | 2997 lowerStore(Store); |
| 2917 _mfence(); | 2998 _mfence(); |
| 2918 return; | 2999 return; |
| 2919 } | 3000 } |
| 2920 case Intrinsics::Bswap: { | 3001 case Intrinsics::Bswap: { |
| 2921 Variable *Dest = Instr->getDest(); | 3002 Variable *Dest = Instr->getDest(); |
| 2922 Operand *Val = Instr->getArg(0); | 3003 Operand *Val = Instr->getArg(0); |
| 2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the | 3004 // In 32-bit mode, bswap only works on 32-bit arguments, and the |
| 2924 // argument must be a register. Use rotate left for 16-bit bswap. | 3005 // argument must be a register. Use rotate left for 16-bit bswap. |
| 2925 if (Val->getType() == IceType_i64) { | 3006 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 2926 Val = legalizeUndef(Val); | 3007 Val = legalizeUndef(Val); |
| 2927 Variable *T_Lo = legalizeToReg(loOperand(Val)); | 3008 Variable *T_Lo = legalizeToReg(loOperand(Val)); |
| 2928 Variable *T_Hi = legalizeToReg(hiOperand(Val)); | 3009 Variable *T_Hi = legalizeToReg(hiOperand(Val)); |
| 2929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2931 _bswap(T_Lo); | 3012 _bswap(T_Lo); |
| 2932 _bswap(T_Hi); | 3013 _bswap(T_Hi); |
| 2933 _mov(DestLo, T_Hi); | 3014 _mov(DestLo, T_Hi); |
| 2934 _mov(DestHi, T_Lo); | 3015 _mov(DestHi, T_Lo); |
| 2935 } else if (Val->getType() == IceType_i32) { | 3016 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) || |
| 3017 Val->getType() == IceType_i32) { | |
| 2936 Variable *T = legalizeToReg(Val); | 3018 Variable *T = legalizeToReg(Val); |
| 2937 _bswap(T); | 3019 _bswap(T); |
| 2938 _mov(Dest, T); | 3020 _mov(Dest, T); |
| 2939 } else { | 3021 } else { |
| 2940 assert(Val->getType() == IceType_i16); | 3022 assert(Val->getType() == IceType_i16); |
| 2941 Constant *Eight = Ctx->getConstantInt16(8); | 3023 Constant *Eight = Ctx->getConstantInt16(8); |
| 2942 Variable *T = nullptr; | 3024 Variable *T = nullptr; |
| 2943 Val = legalize(Val); | 3025 Val = legalize(Val); |
| 2944 _mov(T, Val); | 3026 _mov(T, Val); |
| 2945 _rol(T, Eight); | 3027 _rol(T, Eight); |
| 2946 _mov(Dest, T); | 3028 _mov(Dest, T); |
| 2947 } | 3029 } |
| 2948 return; | 3030 return; |
| 2949 } | 3031 } |
| 2950 case Intrinsics::Ctpop: { | 3032 case Intrinsics::Ctpop: { |
| 2951 Variable *Dest = Instr->getDest(); | 3033 Variable *Dest = Instr->getDest(); |
| 3034 Variable *T = nullptr; | |
| 2952 Operand *Val = Instr->getArg(0); | 3035 Operand *Val = Instr->getArg(0); |
| 2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) | 3036 Type ValTy = Val->getType(); |
| 2954 ? H_call_ctpop_i32 | 3037 assert(ValTy == IceType_i32 || ValTy == IceType_i64); |
| 2955 : H_call_ctpop_i64, | 3038 |
| 2956 Dest, 1); | 3039 if (!Traits::Is64Bit) { |
| 3040 T = Dest; | |
| 3041 } else { | |
| 3042 T = makeReg(IceType_i64); | |
| 3043 if (ValTy == IceType_i32) { | |
| 3044 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by | |
| 3045 // converting it to a 64-bit value, and using ctpop_i64. _movzx should | |
| 3046 // ensure we will not have any bits set on Val's upper 32 bits. | |
| 3047 Variable *V = makeReg(IceType_i64); | |
| 3048 _movzx(V, Val); | |
| 3049 Val = V; | |
| 3050 } | |
| 3051 ValTy = IceType_i64; | |
| 3052 } | |
| 3053 | |
| 3054 InstCall *Call = makeHelperCall( | |
| 3055 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1); | |
| 2957 Call->addArg(Val); | 3056 Call->addArg(Val); |
| 2958 lowerCall(Call); | 3057 lowerCall(Call); |
| 2959 // The popcount helpers always return 32-bit values, while the intrinsic's | 3058 // The popcount helpers always return 32-bit values, while the intrinsic's |
| 2960 // signature matches the native POPCNT instruction and fills a 64-bit reg | 3059 // signature matches the native POPCNT instruction and fills a 64-bit reg |
| 2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case | 3060 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case |
| 2962 // the user doesn't do that in the IR. If the user does that in the IR, | 3061 // the user doesn't do that in the IR. If the user does that in the IR, |
| 2963 // then this zero'ing instruction is dead and gets optimized out. | 3062 // then this zero'ing instruction is dead and gets optimized out. |
| 2964 if (Val->getType() == IceType_i64) { | 3063 if (!Traits::Is64Bit) { |
| 2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3064 assert(T == Dest); |
| 2966 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3065 if (Val->getType() == IceType_i64) { |
| 2967 _mov(DestHi, Zero); | 3066 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3067 Constant *Zero = Ctx->getConstantZero(IceType_i32); | |
| 3068 _mov(DestHi, Zero); | |
| 3069 } | |
| 3070 } else { | |
| 3071 assert(Val->getType() == IceType_i64); | |
| 3072 // T is 64 bit. It needs to be copied to dest. We need to: | |
| 3073 // | |
| 3074 // T_1.32 = trunc T.64 to i32 | |
| 3075 // T_2.64 = zext T_1.32 to i64 | |
| 3076 // Dest.<<right_size>> = T_2.<<right_size>> | |
| 3077 // | |
| 3078 // which ensures the upper 32 bits will always be cleared. Just doing a | |
| 3079 // | |
| 3080 // mov Dest.32 = trunc T.32 to i32 | |
| 3081 // | |
| 3082 // is dangerous because there's a chance the compiler will optimize this | |
| 3083 // copy out. To use _movzx we need two new registers (one 32-, and | |
| 3084 // another 64-bit wide.) | |
| 3085 Variable *T_1 = makeReg(IceType_i32); | |
| 3086 _mov(T_1, T); | |
| 3087 Variable *T_2 = makeReg(IceType_i64); | |
| 3088 _movzx(T_2, T_1); | |
| 3089 _mov(Dest, T_2); | |
| 2968 } | 3090 } |
| 2969 return; | 3091 return; |
| 2970 } | 3092 } |
| 2971 case Intrinsics::Ctlz: { | 3093 case Intrinsics::Ctlz: { |
| 2972 // The "is zero undef" parameter is ignored and we always return | 3094 // The "is zero undef" parameter is ignored and we always return |
| 2973 // a well-defined value. | 3095 // a well-defined value. |
| 2974 Operand *Val = legalize(Instr->getArg(0)); | 3096 Operand *Val = legalize(Instr->getArg(0)); |
| 2975 Operand *FirstVal; | 3097 Operand *FirstVal; |
| 2976 Operand *SecondVal = nullptr; | 3098 Operand *SecondVal = nullptr; |
| 2977 if (Val->getType() == IceType_i64) { | 3099 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 2978 FirstVal = loOperand(Val); | 3100 FirstVal = loOperand(Val); |
| 2979 SecondVal = hiOperand(Val); | 3101 SecondVal = hiOperand(Val); |
| 2980 } else { | 3102 } else { |
| 2981 FirstVal = Val; | 3103 FirstVal = Val; |
| 2982 } | 3104 } |
| 2983 const bool IsCttz = false; | 3105 const bool IsCttz = false; |
| 2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3106 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
| 2985 SecondVal); | 3107 SecondVal); |
| 2986 return; | 3108 return; |
| 2987 } | 3109 } |
| 2988 case Intrinsics::Cttz: { | 3110 case Intrinsics::Cttz: { |
| 2989 // The "is zero undef" parameter is ignored and we always return | 3111 // The "is zero undef" parameter is ignored and we always return |
| 2990 // a well-defined value. | 3112 // a well-defined value. |
| 2991 Operand *Val = legalize(Instr->getArg(0)); | 3113 Operand *Val = legalize(Instr->getArg(0)); |
| 2992 Operand *FirstVal; | 3114 Operand *FirstVal; |
| 2993 Operand *SecondVal = nullptr; | 3115 Operand *SecondVal = nullptr; |
| 2994 if (Val->getType() == IceType_i64) { | 3116 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { |
| 2995 FirstVal = hiOperand(Val); | 3117 FirstVal = hiOperand(Val); |
| 2996 SecondVal = loOperand(Val); | 3118 SecondVal = loOperand(Val); |
| 2997 } else { | 3119 } else { |
| 2998 FirstVal = Val; | 3120 FirstVal = Val; |
| 2999 } | 3121 } |
| 3000 const bool IsCttz = true; | 3122 const bool IsCttz = true; |
| 3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, | 3123 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, |
| 3002 SecondVal); | 3124 SecondVal); |
| 3003 return; | 3125 return; |
| 3004 } | 3126 } |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3098 Func->setError("Should not be lowering UnknownIntrinsic"); | 3220 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3099 return; | 3221 return; |
| 3100 } | 3222 } |
| 3101 return; | 3223 return; |
| 3102 } | 3224 } |
| 3103 | 3225 |
| 3104 template <class Machine> | 3226 template <class Machine> |
| 3105 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3227 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3106 Operand *Ptr, Operand *Expected, | 3228 Operand *Ptr, Operand *Expected, |
| 3107 Operand *Desired) { | 3229 Operand *Desired) { |
| 3108 if (Expected->getType() == IceType_i64) { | 3230 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { |
| 3109 // Reserve the pre-colored registers first, before adding any more | 3231 // Reserve the pre-colored registers first, before adding any more |
| 3110 // infinite-weight variables from formMemoryOperand's legalization. | 3232 // infinite-weight variables from formMemoryOperand's legalization. |
| 3111 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3233 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3112 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3234 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3113 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3235 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3114 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3236 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3115 _mov(T_eax, loOperand(Expected)); | 3237 _mov(T_eax, loOperand(Expected)); |
| 3116 _mov(T_edx, hiOperand(Expected)); | 3238 _mov(T_edx, hiOperand(Expected)); |
| 3117 _mov(T_ebx, loOperand(Desired)); | 3239 _mov(T_ebx, loOperand(Desired)); |
| 3118 _mov(T_ecx, hiOperand(Desired)); | 3240 _mov(T_ecx, hiOperand(Desired)); |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3216 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3338 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 3217 Operand *Ptr, Operand *Val) { | 3339 Operand *Ptr, Operand *Val) { |
| 3218 bool NeedsCmpxchg = false; | 3340 bool NeedsCmpxchg = false; |
| 3219 LowerBinOp Op_Lo = nullptr; | 3341 LowerBinOp Op_Lo = nullptr; |
| 3220 LowerBinOp Op_Hi = nullptr; | 3342 LowerBinOp Op_Hi = nullptr; |
| 3221 switch (Operation) { | 3343 switch (Operation) { |
| 3222 default: | 3344 default: |
| 3223 Func->setError("Unknown AtomicRMW operation"); | 3345 Func->setError("Unknown AtomicRMW operation"); |
| 3224 return; | 3346 return; |
| 3225 case Intrinsics::AtomicAdd: { | 3347 case Intrinsics::AtomicAdd: { |
| 3226 if (Dest->getType() == IceType_i64) { | 3348 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3227 // All the fall-through paths must set this to true, but use this | 3349 // All the fall-through paths must set this to true, but use this |
| 3228 // for asserting. | 3350 // for asserting. |
| 3229 NeedsCmpxchg = true; | 3351 NeedsCmpxchg = true; |
| 3230 Op_Lo = &TargetX86Base<Machine>::_add; | 3352 Op_Lo = &TargetX86Base<Machine>::_add; |
| 3231 Op_Hi = &TargetX86Base<Machine>::_adc; | 3353 Op_Hi = &TargetX86Base<Machine>::_adc; |
| 3232 break; | 3354 break; |
| 3233 } | 3355 } |
| 3234 typename Traits::X86OperandMem *Addr = | 3356 typename Traits::X86OperandMem *Addr = |
| 3235 formMemoryOperand(Ptr, Dest->getType()); | 3357 formMemoryOperand(Ptr, Dest->getType()); |
| 3236 const bool Locked = true; | 3358 const bool Locked = true; |
| 3237 Variable *T = nullptr; | 3359 Variable *T = nullptr; |
| 3238 _mov(T, Val); | 3360 _mov(T, Val); |
| 3239 _xadd(Addr, T, Locked); | 3361 _xadd(Addr, T, Locked); |
| 3240 _mov(Dest, T); | 3362 _mov(Dest, T); |
| 3241 return; | 3363 return; |
| 3242 } | 3364 } |
| 3243 case Intrinsics::AtomicSub: { | 3365 case Intrinsics::AtomicSub: { |
| 3244 if (Dest->getType() == IceType_i64) { | 3366 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3245 NeedsCmpxchg = true; | 3367 NeedsCmpxchg = true; |
| 3246 Op_Lo = &TargetX86Base<Machine>::_sub; | 3368 Op_Lo = &TargetX86Base<Machine>::_sub; |
| 3247 Op_Hi = &TargetX86Base<Machine>::_sbb; | 3369 Op_Hi = &TargetX86Base<Machine>::_sbb; |
| 3248 break; | 3370 break; |
| 3249 } | 3371 } |
| 3250 typename Traits::X86OperandMem *Addr = | 3372 typename Traits::X86OperandMem *Addr = |
| 3251 formMemoryOperand(Ptr, Dest->getType()); | 3373 formMemoryOperand(Ptr, Dest->getType()); |
| 3252 const bool Locked = true; | 3374 const bool Locked = true; |
| 3253 Variable *T = nullptr; | 3375 Variable *T = nullptr; |
| 3254 _mov(T, Val); | 3376 _mov(T, Val); |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 3271 NeedsCmpxchg = true; | 3393 NeedsCmpxchg = true; |
| 3272 Op_Lo = &TargetX86Base<Machine>::_and; | 3394 Op_Lo = &TargetX86Base<Machine>::_and; |
| 3273 Op_Hi = &TargetX86Base<Machine>::_and; | 3395 Op_Hi = &TargetX86Base<Machine>::_and; |
| 3274 break; | 3396 break; |
| 3275 case Intrinsics::AtomicXor: | 3397 case Intrinsics::AtomicXor: |
| 3276 NeedsCmpxchg = true; | 3398 NeedsCmpxchg = true; |
| 3277 Op_Lo = &TargetX86Base<Machine>::_xor; | 3399 Op_Lo = &TargetX86Base<Machine>::_xor; |
| 3278 Op_Hi = &TargetX86Base<Machine>::_xor; | 3400 Op_Hi = &TargetX86Base<Machine>::_xor; |
| 3279 break; | 3401 break; |
| 3280 case Intrinsics::AtomicExchange: | 3402 case Intrinsics::AtomicExchange: |
| 3281 if (Dest->getType() == IceType_i64) { | 3403 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 3282 NeedsCmpxchg = true; | 3404 NeedsCmpxchg = true; |
| 3283 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | 3405 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 3284 // just need to be moved to the ecx and ebx registers. | 3406 // just need to be moved to the ecx and ebx registers. |
| 3285 Op_Lo = nullptr; | 3407 Op_Lo = nullptr; |
| 3286 Op_Hi = nullptr; | 3408 Op_Hi = nullptr; |
| 3287 break; | 3409 break; |
| 3288 } | 3410 } |
| 3289 typename Traits::X86OperandMem *Addr = | 3411 typename Traits::X86OperandMem *Addr = |
| 3290 formMemoryOperand(Ptr, Dest->getType()); | 3412 formMemoryOperand(Ptr, Dest->getType()); |
| 3291 Variable *T = nullptr; | 3413 Variable *T = nullptr; |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3325 // .LABEL: | 3447 // .LABEL: |
| 3326 // mov <reg>, eax | 3448 // mov <reg>, eax |
| 3327 // op <reg>, [desired_adj] | 3449 // op <reg>, [desired_adj] |
| 3328 // lock cmpxchg [ptr], <reg> | 3450 // lock cmpxchg [ptr], <reg> |
| 3329 // jne .LABEL | 3451 // jne .LABEL |
| 3330 // mov <dest>, eax | 3452 // mov <dest>, eax |
| 3331 // | 3453 // |
| 3332 // If Op_{Lo,Hi} are nullptr, then just copy the value. | 3454 // If Op_{Lo,Hi} are nullptr, then just copy the value. |
| 3333 Val = legalize(Val); | 3455 Val = legalize(Val); |
| 3334 Type Ty = Val->getType(); | 3456 Type Ty = Val->getType(); |
| 3335 if (Ty == IceType_i64) { | 3457 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 3336 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | 3458 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3337 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 3459 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3338 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3460 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3339 _mov(T_eax, loOperand(Addr)); | 3461 _mov(T_eax, loOperand(Addr)); |
| 3340 _mov(T_edx, hiOperand(Addr)); | 3462 _mov(T_edx, hiOperand(Addr)); |
| 3341 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 3463 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3342 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); | 3464 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3343 typename Traits::Insts::Label *Label = | 3465 typename Traits::Insts::Label *Label = |
| 3344 Traits::Insts::Label::create(Func, this); | 3466 Traits::Insts::Label::create(Func, this); |
| 3345 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; | 3467 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3457 if (Cttz) { | 3579 if (Cttz) { |
| 3458 _mov(T_Dest, ThirtyTwo); | 3580 _mov(T_Dest, ThirtyTwo); |
| 3459 } else { | 3581 } else { |
| 3460 Constant *SixtyThree = Ctx->getConstantInt32(63); | 3582 Constant *SixtyThree = Ctx->getConstantInt32(63); |
| 3461 _mov(T_Dest, SixtyThree); | 3583 _mov(T_Dest, SixtyThree); |
| 3462 } | 3584 } |
| 3463 _cmov(T_Dest, T, Traits::Cond::Br_ne); | 3585 _cmov(T_Dest, T, Traits::Cond::Br_ne); |
| 3464 if (!Cttz) { | 3586 if (!Cttz) { |
| 3465 _xor(T_Dest, ThirtyOne); | 3587 _xor(T_Dest, ThirtyOne); |
| 3466 } | 3588 } |
| 3467 if (Ty == IceType_i32) { | 3589 if (Traits::Is64Bit || Ty == IceType_i32) { |
| 3468 _mov(Dest, T_Dest); | 3590 _mov(Dest, T_Dest); |
| 3469 return; | 3591 return; |
| 3470 } | 3592 } |
| 3471 _add(T_Dest, ThirtyTwo); | 3593 _add(T_Dest, ThirtyTwo); |
| 3472 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3594 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3473 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3595 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3474 // Will be using "test" on this, so we need a registerized variable. | 3596 // Will be using "test" on this, so we need a registerized variable. |
| 3475 Variable *SecondVar = legalizeToReg(SecondVal); | 3597 Variable *SecondVar = legalizeToReg(SecondVal); |
| 3476 Variable *T_Dest2 = makeReg(IceType_i32); | 3598 Variable *T_Dest2 = makeReg(IceType_i32); |
| 3477 if (Cttz) { | 3599 if (Cttz) { |
| (...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3884 // Index is Index=Var-Const ==> | 4006 // Index is Index=Var-Const ==> |
| 3885 // set Index=Var, Offset-=(Const<<Shift) | 4007 // set Index=Var, Offset-=(Const<<Shift) |
| 3886 | 4008 |
| 3887 // TODO: consider overflow issues with respect to Offset. | 4009 // TODO: consider overflow issues with respect to Offset. |
| 3888 // TODO: handle symbolic constants. | 4010 // TODO: handle symbolic constants. |
| 3889 } | 4011 } |
| 3890 } | 4012 } |
| 3891 | 4013 |
| 3892 template <class Machine> | 4014 template <class Machine> |
| 3893 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { | 4015 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { |
| 3894 // A Load instruction can be treated the same as an Assign instruction, after | 4016 // A Load instruction can be treated the same as an Assign instruction, |
|
Jim Stichnoth
2015/08/11 16:01:37
formatting
John
2015/08/12 19:27:55
Done.
| |
| 4017 // after | |
| 3895 // the source operand is transformed into an Traits::X86OperandMem operand. | 4018 // the source operand is transformed into an Traits::X86OperandMem operand. |
| 3896 // Note that the address mode optimization already creates an | 4019 // Note that the address mode optimization already creates an |
| 3897 // Traits::X86OperandMem operand, so it doesn't need another level of | 4020 // Traits::X86OperandMem operand, so it doesn't need another level of |
| 3898 // transformation. | 4021 // transformation. |
| 3899 Variable *DestLoad = Load->getDest(); | 4022 Variable *DestLoad = Load->getDest(); |
| 3900 Type Ty = DestLoad->getType(); | 4023 Type Ty = DestLoad->getType(); |
| 3901 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 4024 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
| 3902 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 4025 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
| 3903 lowerAssign(Assign); | 4026 lowerAssign(Assign); |
| 3904 } | 4027 } |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4046 return; | 4169 return; |
| 4047 } | 4170 } |
| 4048 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t | 4171 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t |
| 4049 // But if SrcT is immediate, we might be able to do better, as | 4172 // But if SrcT is immediate, we might be able to do better, as |
| 4050 // the cmov instruction doesn't allow an immediate operand: | 4173 // the cmov instruction doesn't allow an immediate operand: |
| 4051 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t | 4174 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t |
| 4052 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { | 4175 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { |
| 4053 std::swap(SrcT, SrcF); | 4176 std::swap(SrcT, SrcF); |
| 4054 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); | 4177 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); |
| 4055 } | 4178 } |
| 4056 if (DestTy == IceType_i64) { | 4179 if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 4057 SrcT = legalizeUndef(SrcT); | 4180 SrcT = legalizeUndef(SrcT); |
| 4058 SrcF = legalizeUndef(SrcF); | 4181 SrcF = legalizeUndef(SrcF); |
| 4059 // Set the low portion. | 4182 // Set the low portion. |
| 4060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4183 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 4061 Variable *TLo = nullptr; | 4184 Variable *TLo = nullptr; |
| 4062 Operand *SrcFLo = legalize(loOperand(SrcF)); | 4185 Operand *SrcFLo = legalize(loOperand(SrcF)); |
| 4063 _mov(TLo, SrcFLo); | 4186 _mov(TLo, SrcFLo); |
| 4064 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); | 4187 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); |
| 4065 _cmov(TLo, SrcTLo, Cond); | 4188 _cmov(TLo, SrcTLo, Cond); |
| 4066 _mov(DestLo, TLo); | 4189 _mov(DestLo, TLo); |
| 4067 // Set the high portion. | 4190 // Set the high portion. |
| 4068 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4191 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 4069 Variable *THi = nullptr; | 4192 Variable *THi = nullptr; |
| 4070 Operand *SrcFHi = legalize(hiOperand(SrcF)); | 4193 Operand *SrcFHi = legalize(hiOperand(SrcF)); |
| 4071 _mov(THi, SrcFHi); | 4194 _mov(THi, SrcFHi); |
| 4072 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); | 4195 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); |
| 4073 _cmov(THi, SrcTHi, Cond); | 4196 _cmov(THi, SrcTHi, Cond); |
| 4074 _mov(DestHi, THi); | 4197 _mov(DestHi, THi); |
| 4075 return; | 4198 return; |
| 4076 } | 4199 } |
| 4077 | 4200 |
| 4078 assert(DestTy == IceType_i16 || DestTy == IceType_i32); | 4201 assert(DestTy == IceType_i16 || DestTy == IceType_i32 || |
| 4202 (Traits::Is64Bit && DestTy == IceType_i64)); | |
| 4079 Variable *T = nullptr; | 4203 Variable *T = nullptr; |
| 4080 SrcF = legalize(SrcF); | 4204 SrcF = legalize(SrcF); |
| 4081 _mov(T, SrcF); | 4205 _mov(T, SrcF); |
| 4082 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); | 4206 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4083 _cmov(T, SrcT, Cond); | 4207 _cmov(T, SrcT, Cond); |
| 4084 _mov(Dest, T); | 4208 _mov(Dest, T); |
| 4085 } | 4209 } |
| 4086 | 4210 |
| 4087 template <class Machine> | 4211 template <class Machine> |
| 4088 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { | 4212 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { |
| 4089 Operand *Value = Inst->getData(); | 4213 Operand *Value = Inst->getData(); |
| 4090 Operand *Addr = Inst->getAddr(); | 4214 Operand *Addr = Inst->getAddr(); |
| 4091 typename Traits::X86OperandMem *NewAddr = | 4215 typename Traits::X86OperandMem *NewAddr = |
| 4092 formMemoryOperand(Addr, Value->getType()); | 4216 formMemoryOperand(Addr, Value->getType()); |
| 4093 Type Ty = NewAddr->getType(); | 4217 Type Ty = NewAddr->getType(); |
| 4094 | 4218 |
| 4095 if (Ty == IceType_i64) { | 4219 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 4096 Value = legalizeUndef(Value); | 4220 Value = legalizeUndef(Value); |
| 4097 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); | 4221 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); |
| 4098 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); | 4222 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); |
| 4099 _store(ValueHi, | 4223 _store(ValueHi, |
| 4100 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); | 4224 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); |
| 4101 _store(ValueLo, | 4225 _store(ValueLo, |
| 4102 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); | 4226 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); |
| 4103 } else if (isVectorType(Ty)) { | 4227 } else if (isVectorType(Ty)) { |
| 4104 _storep(legalizeToReg(Value), NewAddr); | 4228 _storep(legalizeToReg(Value), NewAddr); |
| 4105 } else { | 4229 } else { |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 4133 NewStore->setRmwBeacon(Inst->getRmwBeacon()); | 4257 NewStore->setRmwBeacon(Inst->getRmwBeacon()); |
| 4134 Context.insert(NewStore); | 4258 Context.insert(NewStore); |
| 4135 } | 4259 } |
| 4136 } | 4260 } |
| 4137 | 4261 |
| 4138 template <class Machine> | 4262 template <class Machine> |
| 4139 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, | 4263 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, |
| 4140 uint64_t Min, uint64_t Max) { | 4264 uint64_t Min, uint64_t Max) { |
| 4141 // TODO(ascull): 64-bit should not reach here but only because it is not | 4265 // TODO(ascull): 64-bit should not reach here but only because it is not |
| 4142 // implemented yet. This should be able to handle the 64-bit case. | 4266 // implemented yet. This should be able to handle the 64-bit case. |
| 4143 assert(Comparison->getType() != IceType_i64); | 4267 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64); |
| 4144 // Subtracting 0 is a nop so don't do it | 4268 // Subtracting 0 is a nop so don't do it |
| 4145 if (Min != 0) { | 4269 if (Min != 0) { |
| 4146 // Avoid clobbering the comparison by copying it | 4270 // Avoid clobbering the comparison by copying it |
| 4147 Variable *T = nullptr; | 4271 Variable *T = nullptr; |
| 4148 _mov(T, Comparison); | 4272 _mov(T, Comparison); |
| 4149 _sub(T, Ctx->getConstantInt32(Min)); | 4273 _sub(T, Ctx->getConstantInt32(Min)); |
| 4150 Comparison = T; | 4274 Comparison = T; |
| 4151 } | 4275 } |
| 4152 | 4276 |
| 4153 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 4277 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4232 | 4356 |
| 4233 template <class Machine> | 4357 template <class Machine> |
| 4234 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 4358 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { |
| 4235 // Group cases together and navigate through them with a binary search | 4359 // Group cases together and navigate through them with a binary search |
| 4236 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 4360 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
| 4237 Operand *Src0 = Inst->getComparison(); | 4361 Operand *Src0 = Inst->getComparison(); |
| 4238 CfgNode *DefaultTarget = Inst->getLabelDefault(); | 4362 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
| 4239 | 4363 |
| 4240 assert(CaseClusters.size() != 0); // Should always be at least one | 4364 assert(CaseClusters.size() != 0); // Should always be at least one |
| 4241 | 4365 |
| 4242 if (Src0->getType() == IceType_i64) { | 4366 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) { |
| 4243 Src0 = legalize(Src0); // get Base/Index into physical registers | 4367 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4244 Operand *Src0Lo = loOperand(Src0); | 4368 Operand *Src0Lo = loOperand(Src0); |
| 4245 Operand *Src0Hi = hiOperand(Src0); | 4369 Operand *Src0Hi = hiOperand(Src0); |
| 4246 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 4370 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
| 4247 // TODO(ascull): handle 64-bit case properly (currently naive version) | 4371 // TODO(ascull): handle 64-bit case properly (currently naive version) |
| 4248 // This might be handled by a higher level lowering of switches. | 4372 // This might be handled by a higher level lowering of switches. |
| 4249 SizeT NumCases = Inst->getNumCases(); | 4373 SizeT NumCases = Inst->getNumCases(); |
| 4250 if (NumCases >= 2) { | 4374 if (NumCases >= 2) { |
| 4251 Src0Lo = legalizeToReg(Src0Lo); | 4375 Src0Lo = legalizeToReg(Src0Lo); |
| 4252 Src0Hi = legalizeToReg(Src0Hi); | 4376 Src0Hi = legalizeToReg(Src0Hi); |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4437 // that follows. This means that the original Store instruction is | 4561 // that follows. This means that the original Store instruction is |
| 4438 // still there, either because the value being stored is used beyond | 4562 // still there, either because the value being stored is used beyond |
| 4439 // the Store instruction, or because dead code elimination did not | 4563 // the Store instruction, or because dead code elimination did not |
| 4440 // happen. In either case, we cancel RMW lowering (and the caller | 4564 // happen. In either case, we cancel RMW lowering (and the caller |
| 4441 // deletes the RMW instruction). | 4565 // deletes the RMW instruction). |
| 4442 if (!RMW->isLastUse(RMW->getBeacon())) | 4566 if (!RMW->isLastUse(RMW->getBeacon())) |
| 4443 return; | 4567 return; |
| 4444 Operand *Src = RMW->getData(); | 4568 Operand *Src = RMW->getData(); |
| 4445 Type Ty = Src->getType(); | 4569 Type Ty = Src->getType(); |
| 4446 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); | 4570 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); |
| 4447 if (Ty == IceType_i64) { | 4571 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 4448 Src = legalizeUndef(Src); | 4572 Src = legalizeUndef(Src); |
| 4449 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); | 4573 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); |
| 4450 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); | 4574 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); |
| 4451 typename Traits::X86OperandMem *AddrLo = | 4575 typename Traits::X86OperandMem *AddrLo = |
| 4452 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); | 4576 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); |
| 4453 typename Traits::X86OperandMem *AddrHi = | 4577 typename Traits::X86OperandMem *AddrHi = |
| 4454 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); | 4578 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); |
| 4455 switch (RMW->getOp()) { | 4579 switch (RMW->getOp()) { |
| 4456 default: | 4580 default: |
| 4457 // TODO(stichnot): Implement other arithmetic operators. | 4581 // TODO(stichnot): Implement other arithmetic operators. |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 4471 case InstArithmetic::Or: | 4595 case InstArithmetic::Or: |
| 4472 _or_rmw(AddrLo, SrcLo); | 4596 _or_rmw(AddrLo, SrcLo); |
| 4473 _or_rmw(AddrHi, SrcHi); | 4597 _or_rmw(AddrHi, SrcHi); |
| 4474 return; | 4598 return; |
| 4475 case InstArithmetic::Xor: | 4599 case InstArithmetic::Xor: |
| 4476 _xor_rmw(AddrLo, SrcLo); | 4600 _xor_rmw(AddrLo, SrcLo); |
| 4477 _xor_rmw(AddrHi, SrcHi); | 4601 _xor_rmw(AddrHi, SrcHi); |
| 4478 return; | 4602 return; |
| 4479 } | 4603 } |
| 4480 } else { | 4604 } else { |
| 4481 // i8, i16, i32 | 4605 // x86-32: i8, i16, i32 |
| 4606 // x86-64: i8, i16, i32, i64 | |
| 4482 switch (RMW->getOp()) { | 4607 switch (RMW->getOp()) { |
| 4483 default: | 4608 default: |
| 4484 // TODO(stichnot): Implement other arithmetic operators. | 4609 // TODO(stichnot): Implement other arithmetic operators. |
| 4485 break; | 4610 break; |
| 4486 case InstArithmetic::Add: | 4611 case InstArithmetic::Add: |
| 4487 Src = legalize(Src, Legal_Reg | Legal_Imm); | 4612 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| 4488 _add_rmw(Addr, Src); | 4613 _add_rmw(Addr, Src); |
| 4489 return; | 4614 return; |
| 4490 case InstArithmetic::Sub: | 4615 case InstArithmetic::Sub: |
| 4491 Src = legalize(Src, Legal_Reg | Legal_Imm); | 4616 Src = legalize(Src, Legal_Reg | Legal_Imm); |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 4516 } else { | 4641 } else { |
| 4517 TargetLowering::lowerOther(Instr); | 4642 TargetLowering::lowerOther(Instr); |
| 4518 } | 4643 } |
| 4519 } | 4644 } |
| 4520 | 4645 |
| 4521 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to | 4646 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| 4522 /// preserve integrity of liveness analysis. Undef values are also | 4647 /// preserve integrity of liveness analysis. Undef values are also |
| 4523 /// turned into zeroes, since loOperand() and hiOperand() don't expect | 4648 /// turned into zeroes, since loOperand() and hiOperand() don't expect |
| 4524 /// Undef input. | 4649 /// Undef input. |
| 4525 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { | 4650 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| 4526 // Pause constant blinding or pooling, blinding or pooling will be done later | 4651 if (Traits::Is64Bit) { |
| 4652 // On x86-64 we don't need to prelower phis -- the architecture can handle | |
| 4653 // 64-bit integer natively. | |
| 4654 return; | |
| 4655 } | |
| 4656 | |
| 4657 // Pause constant blinding or pooling, blinding or pooling will be done | |
| 4658 // later | |
|
Jim Stichnoth
2015/08/11 16:01:37
formatting
John
2015/08/12 19:27:55
Done.
| |
| 4527 // during phi lowering assignments | 4659 // during phi lowering assignments |
| 4528 BoolFlagSaver B(RandomizationPoolingPaused, true); | 4660 BoolFlagSaver B(RandomizationPoolingPaused, true); |
| 4529 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( | 4661 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( |
| 4530 this, Context.getNode(), Func); | 4662 this, Context.getNode(), Func); |
| 4531 } | 4663 } |
| 4532 | 4664 |
| 4533 // There is no support for loading or emitting vector constants, so the | 4665 // There is no support for loading or emitting vector constants, so the |
| 4534 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, | 4666 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, |
| 4535 // etc. are initialized with register operations. | 4667 // etc. are initialized with register operations. |
| 4536 // | 4668 // |
| (...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4678 if (auto *Const = llvm::dyn_cast<Constant>(From)) { | 4810 if (auto *Const = llvm::dyn_cast<Constant>(From)) { |
| 4679 if (llvm::isa<ConstantUndef>(Const)) { | 4811 if (llvm::isa<ConstantUndef>(Const)) { |
| 4680 From = legalizeUndef(Const, RegNum); | 4812 From = legalizeUndef(Const, RegNum); |
| 4681 if (isVectorType(Ty)) | 4813 if (isVectorType(Ty)) |
| 4682 return From; | 4814 return From; |
| 4683 Const = llvm::cast<Constant>(From); | 4815 Const = llvm::cast<Constant>(From); |
| 4684 } | 4816 } |
| 4685 // There should be no constants of vector type (other than undef). | 4817 // There should be no constants of vector type (other than undef). |
| 4686 assert(!isVectorType(Ty)); | 4818 assert(!isVectorType(Ty)); |
| 4687 | 4819 |
| 4820 // If the operand is a 64 bit constant integer we need to legalize it to a | |
| 4821 // register in x86-64. | |
| 4822 if (Traits::Is64Bit) { | |
| 4823 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Const)) { | |
| 4824 Variable *V = copyToReg(C, RegNum); | |
| 4825 V->setWeightInfinite(); | |
| 4826 return V; | |
| 4827 } | |
| 4828 } | |
| 4829 | |
| 4688 // If the operand is an 32 bit constant integer, we should check | 4830 // If the operand is an 32 bit constant integer, we should check |
| 4689 // whether we need to randomize it or pool it. | 4831 // whether we need to randomize it or pool it. |
| 4690 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { | 4832 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { |
| 4691 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); | 4833 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); |
| 4692 if (NewConst != Const) { | 4834 if (NewConst != Const) { |
| 4693 return NewConst; | 4835 return NewConst; |
| 4694 } | 4836 } |
| 4695 } | 4837 } |
| 4696 | 4838 |
| 4697 // Convert a scalar floating point constant into an explicit | 4839 // Convert a scalar floating point constant into an explicit |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4815 } | 4957 } |
| 4816 // Do legalization, which contains randomization/pooling | 4958 // Do legalization, which contains randomization/pooling |
| 4817 // or do randomization/pooling. | 4959 // or do randomization/pooling. |
| 4818 return llvm::cast<typename Traits::X86OperandMem>( | 4960 return llvm::cast<typename Traits::X86OperandMem>( |
| 4819 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); | 4961 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); |
| 4820 } | 4962 } |
| 4821 | 4963 |
| 4822 template <class Machine> | 4964 template <class Machine> |
| 4823 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { | 4965 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { |
| 4824 // There aren't any 64-bit integer registers for x86-32. | 4966 // There aren't any 64-bit integer registers for x86-32. |
| 4825 assert(Type != IceType_i64); | 4967 assert(Traits::Is64Bit || Type != IceType_i64); |
| 4826 Variable *Reg = Func->makeVariable(Type); | 4968 Variable *Reg = Func->makeVariable(Type); |
| 4827 if (RegNum == Variable::NoRegister) | 4969 if (RegNum == Variable::NoRegister) |
| 4828 Reg->setWeightInfinite(); | 4970 Reg->setWeightInfinite(); |
| 4829 else | 4971 else |
| 4830 Reg->setRegNum(RegNum); | 4972 Reg->setRegNum(RegNum); |
| 4831 return Reg; | 4973 return Reg; |
| 4832 } | 4974 } |
| 4833 | 4975 |
| 4834 template <class Machine> void TargetX86Base<Machine>::postLower() { | 4976 template <class Machine> void TargetX86Base<Machine>::postLower() { |
| 4835 if (Ctx->getFlags().getOptLevel() == Opt_m1) | 4977 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 4847 | 4989 |
| 4848 template <class Machine> | 4990 template <class Machine> |
| 4849 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { | 4991 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { |
| 4850 if (!BuildDefs::dump()) | 4992 if (!BuildDefs::dump()) |
| 4851 return; | 4993 return; |
| 4852 Ostream &Str = Ctx->getStrEmit(); | 4994 Ostream &Str = Ctx->getStrEmit(); |
| 4853 Str << getConstantPrefix() << C->getValue(); | 4995 Str << getConstantPrefix() << C->getValue(); |
| 4854 } | 4996 } |
| 4855 | 4997 |
| 4856 template <class Machine> | 4998 template <class Machine> |
| 4857 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { | 4999 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const { |
| 4858 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | 5000 if (!Traits::Is64Bit) { |
| 5001 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); | |
| 5002 } else { | |
| 5003 if (!BuildDefs::dump()) | |
| 5004 return; | |
| 5005 Ostream &Str = Ctx->getStrEmit(); | |
| 5006 Str << getConstantPrefix() << C->getValue(); | |
| 5007 } | |
| 4859 } | 5008 } |
| 4860 | 5009 |
| 4861 template <class Machine> | 5010 template <class Machine> |
| 4862 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { | 5011 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { |
| 4863 if (!BuildDefs::dump()) | 5012 if (!BuildDefs::dump()) |
| 4864 return; | 5013 return; |
| 4865 Ostream &Str = Ctx->getStrEmit(); | 5014 Ostream &Str = Ctx->getStrEmit(); |
| 4866 C->emitPoolLabel(Str); | 5015 C->emitPoolLabel(Str); |
| 4867 } | 5016 } |
| 4868 | 5017 |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4993 Constant *Mask1 = Ctx->getConstantInt( | 5142 Constant *Mask1 = Ctx->getConstantInt( |
| 4994 MemOperand->getOffset()->getType(), Cookie + Value); | 5143 MemOperand->getOffset()->getType(), Cookie + Value); |
| 4995 Constant *Mask2 = | 5144 Constant *Mask2 = |
| 4996 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); | 5145 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); |
| 4997 | 5146 |
| 4998 typename Traits::X86OperandMem *TempMemOperand = | 5147 typename Traits::X86OperandMem *TempMemOperand = |
| 4999 Traits::X86OperandMem::create(Func, MemOperand->getType(), | 5148 Traits::X86OperandMem::create(Func, MemOperand->getType(), |
| 5000 MemOperand->getBase(), Mask1); | 5149 MemOperand->getBase(), Mask1); |
| 5001 // If we have already assigned a physical register, we must come from | 5150 // If we have already assigned a physical register, we must come from |
| 5002 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse | 5151 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse |
| 5003 // the assigned register as this assignment is that start of its use-def | 5152 // the assigned register as this assignment is that start of its |
| 5153 // use-def | |
|
Jim Stichnoth
2015/08/11 16:01:37
formatting
John
2015/08/12 19:27:55
Done.
| |
| 5004 // chain. So we add RegNum argument here. | 5154 // chain. So we add RegNum argument here. |
| 5005 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); | 5155 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); |
| 5006 _lea(RegTemp, TempMemOperand); | 5156 _lea(RegTemp, TempMemOperand); |
| 5007 // As source operand doesn't use the dstreg, we don't need to add | 5157 // As source operand doesn't use the dstreg, we don't need to add |
| 5008 // _set_dest_nonkillable(). | 5158 // _set_dest_nonkillable(). |
| 5009 // But if we use the same Dest Reg, that is, with RegNum | 5159 // But if we use the same Dest Reg, that is, with RegNum |
| 5010 // assigned, we should add this _set_dest_nonkillable() | 5160 // assigned, we should add this _set_dest_nonkillable() |
| 5011 if (RegNum != Variable::NoRegister) | 5161 if (RegNum != Variable::NoRegister) |
| 5012 _set_dest_nonkillable(); | 5162 _set_dest_nonkillable(); |
| 5013 | 5163 |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5077 } | 5227 } |
| 5078 // the offset is not eligible for blinding or pooling, return the original | 5228 // the offset is not eligible for blinding or pooling, return the original |
| 5079 // mem operand | 5229 // mem operand |
| 5080 return MemOperand; | 5230 return MemOperand; |
| 5081 } | 5231 } |
| 5082 | 5232 |
| 5083 } // end of namespace X86Internal | 5233 } // end of namespace X86Internal |
| 5084 } // end of namespace Ice | 5234 } // end of namespace Ice |
| 5085 | 5235 |
| 5086 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5236 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |