Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1273153002: Subzero. Native 64-bit int arithmetic on x86-64. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
70 // NumUses counts the number of times Var is used as a source operand in the 70 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var, 71 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var. 72 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0; 73 uint32_t NumUses = 0;
74 }; 74 };
75 75
76 template <class MachineTraits> class BoolFolding { 76 template <class MachineTraits> class BoolFolding {
77 public: 77 public:
78 enum BoolFoldingProducerKind { 78 enum BoolFoldingProducerKind {
79 PK_None, 79 PK_None,
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
80 PK_Icmp32, 81 PK_Icmp32,
81 PK_Icmp64, 82 PK_Icmp64,
82 PK_Fcmp, 83 PK_Fcmp,
83 PK_Trunc 84 PK_Trunc
84 }; 85 };
85 86
86 /// Currently the actual enum values are not used (other than CK_None), but we 87 /// Currently the actual enum values are not used (other than CK_None), but we
87 /// go ahead and produce them anyway for symmetry with the 88 /// go ahead and produce them anyway for symmetry with the
88 /// BoolFoldingProducerKind. 89 /// BoolFoldingProducerKind.
89 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
(...skipping 23 matching lines...) Expand all
113 }; 114 };
114 115
115 template <class MachineTraits> 116 template <class MachineTraits>
116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
118 119
119 template <class MachineTraits> 120 template <class MachineTraits>
120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
122 if (llvm::isa<InstIcmp>(Instr)) { 123 if (llvm::isa<InstIcmp>(Instr)) {
123 if (Instr->getSrc(0)->getType() != IceType_i64) 124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
124 return PK_Icmp32; 125 return PK_Icmp32;
125 return PK_None; // TODO(stichnot): actually PK_Icmp64; 126 return PK_None; // TODO(stichnot): actually PK_Icmp64;
126 } 127 }
127 return PK_None; // TODO(stichnot): remove this 128 return PK_None; // TODO(stichnot): remove this
128 129
129 if (llvm::isa<InstFcmp>(Instr)) 130 if (llvm::isa<InstFcmp>(Instr))
130 return PK_Fcmp; 131 return PK_Fcmp;
131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
132 switch (Cast->getCastKind()) { 133 switch (Cast->getCastKind()) {
133 default: 134 default:
(...skipping 502 matching lines...) Expand 10 before | Expand all | Expand 10 after
636 // instruction or equivalent. 637 // instruction or equivalent.
637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 638 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
638 // An InstLoad always qualifies. 639 // An InstLoad always qualifies.
639 LoadDest = Load->getDest(); 640 LoadDest = Load->getDest();
640 const bool DoLegalize = false; 641 const bool DoLegalize = false;
641 LoadSrc = formMemoryOperand(Load->getSourceAddress(), 642 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
642 LoadDest->getType(), DoLegalize); 643 LoadDest->getType(), DoLegalize);
643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { 644 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
644 // An AtomicLoad intrinsic qualifies as long as it has a valid 645 // An AtomicLoad intrinsic qualifies as long as it has a valid
645 // memory ordering, and can be implemented in a single 646 // memory ordering, and can be implemented in a single
646 // instruction (i.e., not i64). 647 // instruction (i.e., not i64 on x86-32).
647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; 648 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
648 if (ID == Intrinsics::AtomicLoad && 649 if (ID == Intrinsics::AtomicLoad &&
649 Intrin->getDest()->getType() != IceType_i64 && 650 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
650 Intrinsics::isMemoryOrderValid( 651 Intrinsics::isMemoryOrderValid(
651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { 652 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
652 LoadDest = Intrin->getDest(); 653 LoadDest = Intrin->getDest();
653 const bool DoLegalize = false; 654 const bool DoLegalize = false;
654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), 655 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
655 DoLegalize); 656 DoLegalize);
656 } 657 }
657 } 658 }
658 // A Load instruction can be folded into the following 659 // A Load instruction can be folded into the following
659 // instruction only if the following instruction ends the Load's 660 // instruction only if the following instruction ends the Load's
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
717 template <class Machine> 718 template <class Machine>
718 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 719 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { 720 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
720 return Br->optimizeBranch(NextNode); 721 return Br->optimizeBranch(NextNode);
721 } 722 }
722 return false; 723 return false;
723 } 724 }
724 725
725 template <class Machine> 726 template <class Machine>
726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 727 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
728 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
729 if (RegNum == Traits::RegisterSet::Reg_esp ||
730 RegNum == Traits::RegisterSet::Reg_ebp)
731 Ty = Traits::WordType;
727 if (Ty == IceType_void) 732 if (Ty == IceType_void)
728 Ty = IceType_i32; 733 Ty = IceType_i32;
729 if (PhysicalRegisters[Ty].empty()) 734 if (PhysicalRegisters[Ty].empty())
730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 735 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
731 assert(RegNum < PhysicalRegisters[Ty].size()); 736 assert(RegNum < PhysicalRegisters[Ty].size());
732 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 737 Variable *Reg = PhysicalRegisters[Ty][RegNum];
733 if (Reg == nullptr) { 738 if (Reg == nullptr) {
734 Reg = Func->makeVariable(Ty); 739 Reg = Func->makeVariable(Ty);
735 Reg->setRegNum(RegNum); 740 Reg->setRegNum(RegNum);
736 PhysicalRegisters[Ty][RegNum] = Reg; 741 PhysicalRegisters[Ty][RegNum] = Reg;
(...skipping 26 matching lines...) Expand all
763 } 768 }
764 int32_t Offset = Var->getStackOffset(); 769 int32_t Offset = Var->getStackOffset();
765 int32_t BaseRegNum = Var->getBaseRegNum(); 770 int32_t BaseRegNum = Var->getBaseRegNum();
766 if (BaseRegNum == Variable::NoRegister) { 771 if (BaseRegNum == Variable::NoRegister) {
767 BaseRegNum = getFrameOrStackReg(); 772 BaseRegNum = getFrameOrStackReg();
768 if (!hasFramePointer()) 773 if (!hasFramePointer())
769 Offset += getStackAdjustment(); 774 Offset += getStackAdjustment();
770 } 775 }
771 if (Offset) 776 if (Offset)
772 Str << Offset; 777 Str << Offset;
773 const Type FrameSPTy = IceType_i32; 778 const Type FrameSPTy = Traits::WordType;
774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; 779 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
775 } 780 }
776 781
777 template <class Machine> 782 template <class Machine>
778 typename TargetX86Base<Machine>::Traits::Address 783 typename TargetX86Base<Machine>::Traits::Address
779 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { 784 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
780 if (Var->hasReg()) 785 if (Var->hasReg())
781 llvm_unreachable("Stack Variable has a register assigned"); 786 llvm_unreachable("Stack Variable has a register assigned");
782 if (Var->getWeight().isInf()) { 787 if (Var->getWeight().isInf()) {
783 llvm_unreachable("Infinite-weight Variable has no register assigned"); 788 llvm_unreachable("Infinite-weight Variable has no register assigned");
(...skipping 19 matching lines...) Expand all
803 /// function generates an instruction to copy Arg into its assigned 808 /// function generates an instruction to copy Arg into its assigned
804 /// register if applicable. 809 /// register if applicable.
805 template <class Machine> 810 template <class Machine>
806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 811 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
807 Variable *FramePtr, 812 Variable *FramePtr,
808 size_t BasicFrameOffset, 813 size_t BasicFrameOffset,
809 size_t &InArgsSizeBytes) { 814 size_t &InArgsSizeBytes) {
810 Variable *Lo = Arg->getLo(); 815 Variable *Lo = Arg->getLo();
811 Variable *Hi = Arg->getHi(); 816 Variable *Hi = Arg->getHi();
812 Type Ty = Arg->getType(); 817 Type Ty = Arg->getType();
813 if (Lo && Hi && Ty == IceType_i64) { 818 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
814 // TODO(jpp): This special case is not needed for x86-64.
815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 819 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 820 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 821 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 822 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
819 return; 823 return;
820 } 824 }
821 if (isVectorType(Ty)) { 825 if (isVectorType(Ty)) {
822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 826 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
823 } 827 }
824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 828 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 829 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
826 if (Arg->hasReg()) { 830 if (Arg->hasReg()) {
827 assert(Ty != IceType_i64); 831 assert(Ty != IceType_i64 || Traits::Is64Bit);
828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( 832 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 833 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
830 if (isVectorType(Arg->getType())) { 834 if (isVectorType(Arg->getType())) {
831 _movp(Arg, Mem); 835 _movp(Arg, Mem);
832 } else { 836 } else {
833 _mov(Arg, Mem); 837 _mov(Arg, Mem);
834 } 838 }
835 // This argument-copying instruction uses an explicit Traits::X86OperandMem 839 // This argument-copying instruction uses an explicit Traits::X86OperandMem
836 // operand instead of a Variable, so its fill-from-stack operation has to be 840 // operand instead of a Variable, so its fill-from-stack operation has to be
837 // tracked separately for statistics. 841 // tracked separately for statistics.
838 Ctx->statsUpdateFills(); 842 Ctx->statsUpdateFills();
839 } 843 }
840 } 844 }
841 845
842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 846 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
843 // TODO(jpp): this is wrong for x86-64. 847 return Traits::WordType;
844 return IceType_i32;
845 } 848 }
846 849
847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { 850 template <class Machine>
851 template <typename T>
852 typename std::enable_if<!T::Is64Bit, void>::type
853 TargetX86Base<Machine>::split64(Variable *Var) {
848 switch (Var->getType()) { 854 switch (Var->getType()) {
849 default: 855 default:
850 return; 856 return;
851 case IceType_i64: 857 case IceType_i64:
852 // TODO: Only consider F64 if we need to push each half when 858 // TODO: Only consider F64 if we need to push each half when
853 // passing as an argument to a function call. Note that each half 859 // passing as an argument to a function call. Note that each half
854 // is still typed as I32. 860 // is still typed as I32.
855 case IceType_f64: 861 case IceType_f64:
856 break; 862 break;
857 } 863 }
(...skipping 11 matching lines...) Expand all
869 Hi->setName(Func, Var->getName(Func) + "__hi"); 875 Hi->setName(Func, Var->getName(Func) + "__hi");
870 } 876 }
871 Var->setLoHi(Lo, Hi); 877 Var->setLoHi(Lo, Hi);
872 if (Var->getIsArg()) { 878 if (Var->getIsArg()) {
873 Lo->setIsArg(); 879 Lo->setIsArg();
874 Hi->setIsArg(); 880 Hi->setIsArg();
875 } 881 }
876 } 882 }
877 883
878 template <class Machine> 884 template <class Machine>
879 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { 885 template <typename T>
886 typename std::enable_if<!T::Is64Bit, Operand>::type *
887 TargetX86Base<Machine>::loOperand(Operand *Operand) {
880 assert(Operand->getType() == IceType_i64 || 888 assert(Operand->getType() == IceType_i64 ||
881 Operand->getType() == IceType_f64); 889 Operand->getType() == IceType_f64);
882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 890 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
883 return Operand; 891 return Operand;
884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 892 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
885 split64(Var); 893 split64(Var);
886 return Var->getLo(); 894 return Var->getLo();
887 } 895 }
888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 896 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 897 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 898 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
891 // Check if we need to blind/pool the constant. 899 // Check if we need to blind/pool the constant.
892 return legalize(ConstInt); 900 return legalize(ConstInt);
893 } 901 }
894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { 902 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
895 auto *MemOperand = Traits::X86OperandMem::create( 903 auto *MemOperand = Traits::X86OperandMem::create(
896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 904 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
897 Mem->getShift(), Mem->getSegmentRegister()); 905 Mem->getShift(), Mem->getSegmentRegister());
898 // Test if we should randomize or pool the offset, if so randomize it or 906 // Test if we should randomize or pool the offset, if so randomize it or
899 // pool it then create mem operand with the blinded/pooled constant. 907 // pool it then create mem operand with the blinded/pooled constant.
900 // Otherwise, return the mem operand as ordinary mem operand. 908 // Otherwise, return the mem operand as ordinary mem operand.
901 return legalize(MemOperand); 909 return legalize(MemOperand);
902 } 910 }
903 llvm_unreachable("Unsupported operand type"); 911 llvm_unreachable("Unsupported operand type");
904 return nullptr; 912 return nullptr;
905 } 913 }
906 914
907 template <class Machine> 915 template <class Machine>
908 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { 916 template <typename T>
917 typename std::enable_if<!T::Is64Bit, Operand>::type *
918 TargetX86Base<Machine>::hiOperand(Operand *Operand) {
909 assert(Operand->getType() == IceType_i64 || 919 assert(Operand->getType() == IceType_i64 ||
910 Operand->getType() == IceType_f64); 920 Operand->getType() == IceType_f64);
911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 921 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
912 return Operand; 922 return Operand;
913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 923 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
914 split64(Var); 924 split64(Var);
915 return Var->getHi(); 925 return Var->getHi();
916 } 926 }
917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 927 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 928 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
1100 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { 1110 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1101 Variable *Dest = Inst->getDest(); 1111 Variable *Dest = Inst->getDest();
1102 Operand *Src0 = legalize(Inst->getSrc(0)); 1112 Operand *Src0 = legalize(Inst->getSrc(0));
1103 Operand *Src1 = legalize(Inst->getSrc(1)); 1113 Operand *Src1 = legalize(Inst->getSrc(1));
1104 if (Inst->isCommutative()) { 1114 if (Inst->isCommutative()) {
1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1115 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1106 std::swap(Src0, Src1); 1116 std::swap(Src0, Src1);
1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) 1117 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1108 std::swap(Src0, Src1); 1118 std::swap(Src0, Src1);
1109 } 1119 }
1110 if (Dest->getType() == IceType_i64) { 1120 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1111 // These helper-call-involved instructions are lowered in this 1121 // These x86-32 helper-call-involved instructions are lowered in this
1112 // separate switch. This is because loOperand() and hiOperand() 1122 // separate switch. This is because loOperand() and hiOperand()
1113 // may insert redundant instructions for constant blinding and 1123 // may insert redundant instructions for constant blinding and
1114 // pooling. Such redundant instructions will fail liveness analysis 1124 // pooling. Such redundant instructions will fail liveness analysis
1115 // under -Om1 setting. And, actually these arguments do not need 1125 // under -Om1 setting. And, actually these arguments do not need
1116 // to be processed with loOperand() and hiOperand() to be used. 1126 // to be processed with loOperand() and hiOperand() to be used.
1117 switch (Inst->getOp()) { 1127 switch (Inst->getOp()) {
1118 case InstArithmetic::Udiv: { 1128 case InstArithmetic::Udiv: {
1119 const SizeT MaxSrcs = 2; 1129 const SizeT MaxSrcs = 2;
1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1130 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1121 Call->addArg(Inst->getSrc(0)); 1131 Call->addArg(Inst->getSrc(0));
(...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after
1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1659 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1650 // this ever becomes a problem we can introduce a pseudo rem instruction 1660 // this ever becomes a problem we can introduce a pseudo rem instruction
1651 // that returns the remainder in %al directly (and uses a mov for copying 1661 // that returns the remainder in %al directly (and uses a mov for copying
1652 // %ah to %al.) 1662 // %ah to %al.)
1653 static constexpr uint8_t AlSizeInBits = 8; 1663 static constexpr uint8_t AlSizeInBits = 8;
1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1664 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1655 _mov(Dest, T); 1665 _mov(Dest, T);
1656 Context.insert(InstFakeUse::create(Func, T_eax)); 1666 Context.insert(InstFakeUse::create(Func, T_eax));
1657 } else { 1667 } else {
1658 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1668 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); 1669 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1670 _mov(T_edx, Zero);
1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1671 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1661 _div(T_edx, Src1, T); 1672 _div(T_edx, Src1, T);
1662 _mov(Dest, T_edx); 1673 _mov(Dest, T_edx);
1663 } 1674 }
1664 break; 1675 break;
1665 case InstArithmetic::Srem: 1676 case InstArithmetic::Srem:
1666 // TODO(stichnot): Enable this after doing better performance 1677 // TODO(stichnot): Enable this after doing better performance
1667 // and cross testing. 1678 // and cross testing.
1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1679 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1669 // Optimize mod by constant power of 2, but not for Om1 or O0, 1680 // Optimize mod by constant power of 2, but not for Om1 or O0,
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't 1725 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1726 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1716 // this ever becomes a problem we can introduce a pseudo rem instruction 1727 // this ever becomes a problem we can introduce a pseudo rem instruction
1717 // that returns the remainder in %al directly (and uses a mov for copying 1728 // that returns the remainder in %al directly (and uses a mov for copying
1718 // %ah to %al.) 1729 // %ah to %al.)
1719 static constexpr uint8_t AlSizeInBits = 8; 1730 static constexpr uint8_t AlSizeInBits = 8;
1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1731 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1721 _mov(Dest, T); 1732 _mov(Dest, T);
1722 Context.insert(InstFakeUse::create(Func, T_eax)); 1733 Context.insert(InstFakeUse::create(Func, T_eax));
1723 } else { 1734 } else {
1724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 1735 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1726 _cbwdq(T_edx, T); 1737 _cbwdq(T_edx, T);
1727 _idiv(T_edx, Src1, T); 1738 _idiv(T_edx, Src1, T);
1728 _mov(Dest, T_edx); 1739 _mov(Dest, T_edx);
1729 } 1740 }
1730 break; 1741 break;
1731 case InstArithmetic::Fadd: 1742 case InstArithmetic::Fadd:
1732 _mov(T, Src0); 1743 _mov(T, Src0);
1733 _addss(T, Src1); 1744 _addss(T, Src1);
1734 _mov(Dest, T); 1745 _mov(Dest, T);
(...skipping 23 matching lines...) Expand all
1758 return lowerCall(Call); 1769 return lowerCall(Call);
1759 } 1770 }
1760 } 1771 }
1761 } 1772 }
1762 1773
1763 template <class Machine> 1774 template <class Machine>
1764 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1775 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1765 Variable *Dest = Inst->getDest(); 1776 Variable *Dest = Inst->getDest();
1766 Operand *Src0 = Inst->getSrc(0); 1777 Operand *Src0 = Inst->getSrc(0);
1767 assert(Dest->getType() == Src0->getType()); 1778 assert(Dest->getType() == Src0->getType());
1768 if (Dest->getType() == IceType_i64) { 1779 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1769 Src0 = legalize(Src0); 1780 Src0 = legalize(Src0);
1770 Operand *Src0Lo = loOperand(Src0); 1781 Operand *Src0Lo = loOperand(Src0);
1771 Operand *Src0Hi = hiOperand(Src0); 1782 Operand *Src0Hi = hiOperand(Src0);
1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1783 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1784 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1774 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1785 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1775 _mov(T_Lo, Src0Lo); 1786 _mov(T_Lo, Src0Lo);
1776 _mov(DestLo, T_Lo); 1787 _mov(DestLo, T_Lo);
1777 _mov(T_Hi, Src0Hi); 1788 _mov(T_Hi, Src0Hi);
1778 _mov(DestHi, T_Hi); 1789 _mov(DestHi, T_Hi);
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
1863 SizeT ShiftAmount = 1874 SizeT ShiftAmount =
1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1875 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
1865 1; 1876 1;
1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 1877 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1867 Variable *T = makeReg(DestTy); 1878 Variable *T = makeReg(DestTy);
1868 _movp(T, Src0RM); 1879 _movp(T, Src0RM);
1869 _psll(T, ShiftConstant); 1880 _psll(T, ShiftConstant);
1870 _psra(T, ShiftConstant); 1881 _psra(T, ShiftConstant);
1871 _movp(Dest, T); 1882 _movp(Dest, T);
1872 } 1883 }
1873 } else if (Dest->getType() == IceType_i64) { 1884 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1885 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1875 Constant *Shift = Ctx->getConstantInt32(31); 1886 Constant *Shift = Ctx->getConstantInt32(31);
1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1887 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1888 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1878 Variable *T_Lo = makeReg(DestLo->getType()); 1889 Variable *T_Lo = makeReg(DestLo->getType());
1879 if (Src0RM->getType() == IceType_i32) { 1890 if (Src0RM->getType() == IceType_i32) {
1880 _mov(T_Lo, Src0RM); 1891 _mov(T_Lo, Src0RM);
1881 } else if (Src0RM->getType() == IceType_i1) { 1892 } else if (Src0RM->getType() == IceType_i1) {
1882 _movzx(T_Lo, Src0RM); 1893 _movzx(T_Lo, Src0RM);
1883 _shl(T_Lo, Shift); 1894 _shl(T_Lo, Shift);
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1923 case InstCast::Zext: { 1934 case InstCast::Zext: {
1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1935 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1925 if (isVectorType(Dest->getType())) { 1936 if (isVectorType(Dest->getType())) {
1926 // onemask = materialize(1,1,...); dest = onemask & src 1937 // onemask = materialize(1,1,...); dest = onemask & src
1927 Type DestTy = Dest->getType(); 1938 Type DestTy = Dest->getType();
1928 Variable *OneMask = makeVectorOfOnes(DestTy); 1939 Variable *OneMask = makeVectorOfOnes(DestTy);
1929 Variable *T = makeReg(DestTy); 1940 Variable *T = makeReg(DestTy);
1930 _movp(T, Src0RM); 1941 _movp(T, Src0RM);
1931 _pand(T, OneMask); 1942 _pand(T, OneMask);
1932 _movp(Dest, T); 1943 _movp(Dest, T);
1933 } else if (Dest->getType() == IceType_i64) { 1944 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1934 // t1=movzx src; dst.lo=t1; dst.hi=0 1945 // t1=movzx src; dst.lo=t1; dst.hi=0
1935 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1946 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1947 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1948 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1938 Variable *Tmp = makeReg(DestLo->getType()); 1949 Variable *Tmp = makeReg(DestLo->getType());
1939 if (Src0RM->getType() == IceType_i32) { 1950 if (Src0RM->getType() == IceType_i32) {
1940 _mov(Tmp, Src0RM); 1951 _mov(Tmp, Src0RM);
1941 } else { 1952 } else {
1942 _movzx(Tmp, Src0RM); 1953 _movzx(Tmp, Src0RM);
1943 } 1954 }
1944 if (Src0RM->getType() == IceType_i1) { 1955 if (Src0RM->getType() == IceType_i1) {
1945 Constant *One = Ctx->getConstantInt32(1); 1956 Constant *One = Ctx->getConstantInt32(1);
1946 _and(Tmp, One); 1957 _and(Tmp, One);
1947 } 1958 }
1948 _mov(DestLo, Tmp); 1959 _mov(DestLo, Tmp);
1949 _mov(DestHi, Zero); 1960 _mov(DestHi, Zero);
1950 } else if (Src0RM->getType() == IceType_i1) { 1961 } else if (Src0RM->getType() == IceType_i1) {
1951 // t = Src0RM; t &= 1; Dest = t 1962 // t = Src0RM; t &= 1; Dest = t
1952 Constant *One = Ctx->getConstantInt32(1); 1963 Constant *One = Ctx->getConstantInt32(1);
1953 Type DestTy = Dest->getType(); 1964 Type DestTy = Dest->getType();
1954 Variable *T; 1965 Variable *T;
1955 if (DestTy == IceType_i8) { 1966 T = makeReg(IceType_i32);
1956 T = makeReg(DestTy); 1967 _mov(T, Src0RM);
1957 _mov(T, Src0RM); 1968 _and(T, One);
1958 } else { 1969 if (!Traits::Is64Bit) {
1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. 1970 assert(DestTy != IceType_i64);
1960 T = makeReg(IceType_i32); 1971 } else if (DestTy == IceType_i64) {
1961 _movzx(T, Src0RM); 1972 // In x86-64 we should be able to rely on mov reg, reg to zero extend T
1973 // into Dest. At this point we can't ensure Dest will live in a
1974 // register. Therefore, we use _movzx, which the assembler rightly
1975 // converts to a 32-bit mov. A new temporary is created because the
1976 // assembler does not know how to movzx to a memory location.
1977 Variable *T_1 = makeReg(IceType_i64);
1978 _movzx(T_1, T);
1979 T = T_1;
1962 } 1980 }
1963 _and(T, One);
1964 _mov(Dest, T); 1981 _mov(Dest, T);
1965 } else { 1982 } else {
1966 // t1 = movzx src; dst = t1 1983 // t1 = movzx src; dst = t1
1967 Variable *T = makeReg(Dest->getType()); 1984 Variable *T = makeReg(Dest->getType());
1968 _movzx(T, Src0RM); 1985 _movzx(T, Src0RM);
1969 _mov(Dest, T); 1986 _mov(Dest, T);
1970 } 1987 }
1971 break; 1988 break;
1972 } 1989 }
1973 case InstCast::Trunc: { 1990 case InstCast::Trunc: {
1974 if (isVectorType(Dest->getType())) { 1991 if (isVectorType(Dest->getType())) {
1975 // onemask = materialize(1,1,...); dst = src & onemask 1992 // onemask = materialize(1,1,...); dst = src & onemask
1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1993 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1977 Type Src0Ty = Src0RM->getType(); 1994 Type Src0Ty = Src0RM->getType();
1978 Variable *OneMask = makeVectorOfOnes(Src0Ty); 1995 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1979 Variable *T = makeReg(Dest->getType()); 1996 Variable *T = makeReg(Dest->getType());
1980 _movp(T, Src0RM); 1997 _movp(T, Src0RM);
1981 _pand(T, OneMask); 1998 _pand(T, OneMask);
1982 _movp(Dest, T); 1999 _movp(Dest, T);
1983 } else { 2000 } else {
1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2001 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1985 if (Src0->getType() == IceType_i64) 2002 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
1986 Src0 = loOperand(Src0); 2003 Src0 = loOperand(Src0);
1987 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2004 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
1988 // t1 = trunc Src0RM; Dest = t1 2005 // t1 = trunc Src0RM; Dest = t1
1989 Variable *T = nullptr; 2006 Variable *T = nullptr;
1990 _mov(T, Src0RM); 2007 _mov(T, Src0RM);
1991 if (Dest->getType() == IceType_i1) 2008 if (Dest->getType() == IceType_i1)
1992 _and(T, Ctx->getConstantInt1(1)); 2009 _and(T, Ctx->getConstantInt1(1));
1993 _mov(Dest, T); 2010 _mov(Dest, T);
1994 } 2011 }
1995 break; 2012 break;
(...skipping 10 matching lines...) Expand all
2006 case InstCast::Fptosi: 2023 case InstCast::Fptosi:
2007 if (isVectorType(Dest->getType())) { 2024 if (isVectorType(Dest->getType())) {
2008 assert(Dest->getType() == IceType_v4i32 && 2025 assert(Dest->getType() == IceType_v4i32 &&
2009 Inst->getSrc(0)->getType() == IceType_v4f32); 2026 Inst->getSrc(0)->getType() == IceType_v4f32);
2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2027 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2028 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2012 Src0RM = legalizeToReg(Src0RM); 2029 Src0RM = legalizeToReg(Src0RM);
2013 Variable *T = makeReg(Dest->getType()); 2030 Variable *T = makeReg(Dest->getType());
2014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2031 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2015 _movp(Dest, T); 2032 _movp(Dest, T);
2016 } else if (Dest->getType() == IceType_i64) { 2033 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2017 // Use a helper for converting floating-point values to 64-bit 2034 // Use a helper for converting floating-point values to 64-bit
2018 // integers. SSE2 appears to have no way to convert from xmm 2035 // integers. SSE2 appears to have no way to convert from xmm
2019 // registers to something like the edx:eax register pair, and 2036 // registers to something like the edx:eax register pair, and
2020 // gcc and clang both want to use x87 instructions complete with 2037 // gcc and clang both want to use x87 instructions complete with
2021 // temporary manipulation of the status word. This helper is 2038 // temporary manipulation of the status word. This helper is
2022 // not needed for x86-64. 2039 // not needed for x86-64.
2023 split64(Dest); 2040 split64(Dest);
2024 const SizeT MaxSrcs = 1; 2041 const SizeT MaxSrcs = 1;
2025 Type SrcType = Inst->getSrc(0)->getType(); 2042 Type SrcType = Inst->getSrc(0)->getType();
2026 InstCall *Call = 2043 InstCall *Call =
2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2044 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2028 : H_fptosi_f64_i64, 2045 : H_fptosi_f64_i64,
2029 Dest, MaxSrcs); 2046 Dest, MaxSrcs);
2030 Call->addArg(Inst->getSrc(0)); 2047 Call->addArg(Inst->getSrc(0));
2031 lowerCall(Call); 2048 lowerCall(Call);
2032 } else { 2049 } else {
2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2050 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2051 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2035 Variable *T_1 = makeReg(IceType_i32); 2052 Variable *T_1 = nullptr;
2053 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
2054 T_1 = makeReg(IceType_i64);
2055 } else {
2056 assert(Dest->getType() != IceType_i64);
2057 T_1 = makeReg(IceType_i32);
2058 }
2059 // cvt() requires its integer argument to be a GPR.
2060 T_1->setWeightInfinite();
2036 Variable *T_2 = makeReg(Dest->getType()); 2061 Variable *T_2 = makeReg(Dest->getType());
2037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2062 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2063 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2039 if (Dest->getType() == IceType_i1) 2064 if (Dest->getType() == IceType_i1)
2040 _and(T_2, Ctx->getConstantInt1(1)); 2065 _and(T_2, Ctx->getConstantInt1(1));
2041 _mov(Dest, T_2); 2066 _mov(Dest, T_2);
2042 } 2067 }
2043 break; 2068 break;
2044 case InstCast::Fptoui: 2069 case InstCast::Fptoui:
2045 if (isVectorType(Dest->getType())) { 2070 if (isVectorType(Dest->getType())) {
2046 assert(Dest->getType() == IceType_v4i32 && 2071 assert(Dest->getType() == IceType_v4i32 &&
2047 Inst->getSrc(0)->getType() == IceType_v4f32); 2072 Inst->getSrc(0)->getType() == IceType_v4f32);
2048 const SizeT MaxSrcs = 1; 2073 const SizeT MaxSrcs = 1;
2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); 2074 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2050 Call->addArg(Inst->getSrc(0)); 2075 Call->addArg(Inst->getSrc(0));
2051 lowerCall(Call); 2076 lowerCall(Call);
2052 } else if (Dest->getType() == IceType_i64 || 2077 } else if (Dest->getType() == IceType_i64 ||
2053 Dest->getType() == IceType_i32) { 2078 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
2054 // Use a helper for both x86-32 and x86-64. 2079 // Use a helper for both x86-32 and x86-64.
2055 split64(Dest); 2080 if (!Traits::Is64Bit)
2081 split64(Dest);
2056 const SizeT MaxSrcs = 1; 2082 const SizeT MaxSrcs = 1;
2057 Type DestType = Dest->getType(); 2083 Type DestType = Dest->getType();
2058 Type SrcType = Inst->getSrc(0)->getType(); 2084 Type SrcType = Inst->getSrc(0)->getType();
2059 IceString TargetString; 2085 IceString TargetString;
2060 if (isInt32Asserting32Or64(DestType)) { 2086 if (Traits::Is64Bit) {
2087 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2088 : H_fptoui_f64_i64;
2089 } else if (isInt32Asserting32Or64(DestType)) {
2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 2090 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2062 : H_fptoui_f64_i32; 2091 : H_fptoui_f64_i32;
2063 } else { 2092 } else {
2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2093 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2065 : H_fptoui_f64_i64; 2094 : H_fptoui_f64_i64;
2066 } 2095 }
2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2096 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2068 Call->addArg(Inst->getSrc(0)); 2097 Call->addArg(Inst->getSrc(0));
2069 lowerCall(Call); 2098 lowerCall(Call);
2070 return; 2099 return;
2071 } else { 2100 } else {
2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2102 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2074 Variable *T_1 = makeReg(IceType_i32); 2103 assert(Dest->getType() != IceType_i64);
2104 Variable *T_1 = nullptr;
2105 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
2106 T_1 = makeReg(IceType_i64);
2107 } else {
2108 assert(Dest->getType() != IceType_i32);
2109 T_1 = makeReg(IceType_i32);
2110 }
2111 T_1->setWeightInfinite();
2075 Variable *T_2 = makeReg(Dest->getType()); 2112 Variable *T_2 = makeReg(Dest->getType());
2076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2113 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2114 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2078 if (Dest->getType() == IceType_i1) 2115 if (Dest->getType() == IceType_i1)
2079 _and(T_2, Ctx->getConstantInt1(1)); 2116 _and(T_2, Ctx->getConstantInt1(1));
2080 _mov(Dest, T_2); 2117 _mov(Dest, T_2);
2081 } 2118 }
2082 break; 2119 break;
2083 case InstCast::Sitofp: 2120 case InstCast::Sitofp:
2084 if (isVectorType(Dest->getType())) { 2121 if (isVectorType(Dest->getType())) {
2085 assert(Dest->getType() == IceType_v4f32 && 2122 assert(Dest->getType() == IceType_v4f32 &&
2086 Inst->getSrc(0)->getType() == IceType_v4i32); 2123 Inst->getSrc(0)->getType() == IceType_v4i32);
2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2124 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2125 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2089 Src0RM = legalizeToReg(Src0RM); 2126 Src0RM = legalizeToReg(Src0RM);
2090 Variable *T = makeReg(Dest->getType()); 2127 Variable *T = makeReg(Dest->getType());
2091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2128 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2092 _movp(Dest, T); 2129 _movp(Dest, T);
2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) { 2130 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2094 // Use a helper for x86-32. 2131 // Use a helper for x86-32.
2095 const SizeT MaxSrcs = 1; 2132 const SizeT MaxSrcs = 1;
2096 Type DestType = Dest->getType(); 2133 Type DestType = Dest->getType();
2097 InstCall *Call = 2134 InstCall *Call =
2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2135 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2099 : H_sitofp_i64_f64, 2136 : H_sitofp_i64_f64,
2100 Dest, MaxSrcs); 2137 Dest, MaxSrcs);
2101 // TODO: Call the correct compiler-rt helper function. 2138 // TODO: Call the correct compiler-rt helper function.
2102 Call->addArg(Inst->getSrc(0)); 2139 Call->addArg(Inst->getSrc(0));
2103 lowerCall(Call); 2140 lowerCall(Call);
2104 return; 2141 return;
2105 } else { 2142 } else {
2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2143 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2107 // Sign-extend the operand. 2144 // Sign-extend the operand.
2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2145 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2109 Variable *T_1 = makeReg(IceType_i32); 2146 Variable *T_1 = nullptr;
2147 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2148 T_1 = makeReg(IceType_i64);
2149 } else {
2150 assert(Src0RM->getType() != IceType_i64);
2151 T_1 = makeReg(IceType_i32);
2152 }
2153 T_1->setWeightInfinite();
2110 Variable *T_2 = makeReg(Dest->getType()); 2154 Variable *T_2 = makeReg(Dest->getType());
2111 if (Src0RM->getType() == IceType_i32) 2155 if (Src0RM->getType() == T_1->getType())
2112 _mov(T_1, Src0RM); 2156 _mov(T_1, Src0RM);
2113 else 2157 else
2114 _movsx(T_1, Src0RM); 2158 _movsx(T_1, Src0RM);
2115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2159 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2116 _mov(Dest, T_2); 2160 _mov(Dest, T_2);
2117 } 2161 }
2118 break; 2162 break;
2119 case InstCast::Uitofp: { 2163 case InstCast::Uitofp: {
2120 Operand *Src0 = Inst->getSrc(0); 2164 Operand *Src0 = Inst->getSrc(0);
2121 if (isVectorType(Src0->getType())) { 2165 if (isVectorType(Src0->getType())) {
2122 assert(Dest->getType() == IceType_v4f32 && 2166 assert(Dest->getType() == IceType_v4f32 &&
2123 Src0->getType() == IceType_v4i32); 2167 Src0->getType() == IceType_v4i32);
2124 const SizeT MaxSrcs = 1; 2168 const SizeT MaxSrcs = 1;
2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2169 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2126 Call->addArg(Src0); 2170 Call->addArg(Src0);
2127 lowerCall(Call); 2171 lowerCall(Call);
2128 } else if (Src0->getType() == IceType_i64 || 2172 } else if (Src0->getType() == IceType_i64 ||
2129 Src0->getType() == IceType_i32) { 2173 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2130 // Use a helper for x86-32 and x86-64. Also use a helper for 2174 // Use a helper for x86-32 and x86-64. Also use a helper for
2131 // i32 on x86-32. 2175 // i32 on x86-32.
2132 const SizeT MaxSrcs = 1; 2176 const SizeT MaxSrcs = 1;
2133 Type DestType = Dest->getType(); 2177 Type DestType = Dest->getType();
2134 IceString TargetString; 2178 IceString TargetString;
2135 if (isInt32Asserting32Or64(Src0->getType())) { 2179 if (isInt32Asserting32Or64(Src0->getType())) {
2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2180 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2137 : H_uitofp_i32_f64; 2181 : H_uitofp_i32_f64;
2138 } else { 2182 } else {
2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2183 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2140 : H_uitofp_i64_f64; 2184 : H_uitofp_i64_f64;
2141 } 2185 }
2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2186 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2143 Call->addArg(Src0); 2187 Call->addArg(Src0);
2144 lowerCall(Call); 2188 lowerCall(Call);
2145 return; 2189 return;
2146 } else { 2190 } else {
2147 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2191 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2148 // Zero-extend the operand. 2192 // Zero-extend the operand.
2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2193 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2150 Variable *T_1 = makeReg(IceType_i32); 2194 Variable *T_1 = nullptr;
2195 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2196 T_1 = makeReg(IceType_i64);
2197 } else {
2198 assert(Src0RM->getType() != IceType_i64);
2199 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2200 T_1 = makeReg(IceType_i32);
2201 }
2202 T_1->setWeightInfinite();
2151 Variable *T_2 = makeReg(Dest->getType()); 2203 Variable *T_2 = makeReg(Dest->getType());
2152 if (Src0RM->getType() == IceType_i32) 2204 if (Src0RM->getType() == T_1->getType())
2153 _mov(T_1, Src0RM); 2205 _mov(T_1, Src0RM);
2154 else 2206 else
2155 _movzx(T_1, Src0RM); 2207 _movzx(T_1, Src0RM);
2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2208 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2157 _mov(Dest, T_2); 2209 _mov(Dest, T_2);
2158 } 2210 }
2159 break; 2211 break;
2160 } 2212 }
2161 case InstCast::Bitcast: { 2213 case InstCast::Bitcast: {
2162 Operand *Src0 = Inst->getSrc(0); 2214 Operand *Src0 = Inst->getSrc(0);
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
2198 typename Traits::SpillVariable *SpillVar = 2250 typename Traits::SpillVariable *SpillVar =
2199 Func->makeVariable<typename Traits::SpillVariable>(SrcType); 2251 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
2200 SpillVar->setLinkedTo(Dest); 2252 SpillVar->setLinkedTo(Dest);
2201 Variable *Spill = SpillVar; 2253 Variable *Spill = SpillVar;
2202 Spill->setWeight(RegWeight::Zero); 2254 Spill->setWeight(RegWeight::Zero);
2203 _mov(T, Src0RM); 2255 _mov(T, Src0RM);
2204 _mov(Spill, T); 2256 _mov(Spill, T);
2205 _mov(Dest, Spill); 2257 _mov(Dest, Spill);
2206 } break; 2258 } break;
2207 case IceType_i64: { 2259 case IceType_i64: {
2208 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2260 assert(Src0->getType() == IceType_f64);
2209 assert(Src0RM->getType() == IceType_f64); 2261 if (Traits::Is64Bit) {
2210 // a.i64 = bitcast b.f64 ==> 2262 // Movd requires its fp argument (in this case, the bitcast source) to
2211 // s.f64 = spill b.f64 2263 // be an xmm register.
2212 // t_lo.i32 = lo(s.f64) 2264 Variable *Src0R = legalizeToReg(Src0);
2213 // a_lo.i32 = t_lo.i32 2265 Variable *T = makeReg(IceType_i64);
2214 // t_hi.i32 = hi(s.f64) 2266 _movd(T, Src0R);
2215 // a_hi.i32 = t_hi.i32 2267 _mov(Dest, T);
2216 Operand *SpillLo, *SpillHi; 2268 } else {
2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { 2269 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2270 // a.i64 = bitcast b.f64 ==>
2271 // s.f64 = spill b.f64
2272 // t_lo.i32 = lo(s.f64)
2273 // a_lo.i32 = t_lo.i32
2274 // t_hi.i32 = hi(s.f64)
2275 // a_hi.i32 = t_hi.i32
2276 Operand *SpillLo, *SpillHi;
2277 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2278 typename Traits::SpillVariable *SpillVar =
2279 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2280 SpillVar->setLinkedTo(Src0Var);
2281 Variable *Spill = SpillVar;
2282 Spill->setWeight(RegWeight::Zero);
2283 _movq(Spill, Src0RM);
2284 SpillLo = Traits::VariableSplit::create(Func, Spill,
2285 Traits::VariableSplit::Low);
2286 SpillHi = Traits::VariableSplit::create(Func, Spill,
2287 Traits::VariableSplit::High);
2288 } else {
2289 SpillLo = loOperand(Src0RM);
2290 SpillHi = hiOperand(Src0RM);
2291 }
2292
2293 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2294 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2295 Variable *T_Lo = makeReg(IceType_i32);
2296 Variable *T_Hi = makeReg(IceType_i32);
2297
2298 _mov(T_Lo, SpillLo);
2299 _mov(DestLo, T_Lo);
2300 _mov(T_Hi, SpillHi);
2301 _mov(DestHi, T_Hi);
2302 }
2303 } break;
2304 case IceType_f64: {
2305 assert(Src0->getType() == IceType_i64);
2306 if (Traits::Is64Bit) {
2307 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2308 Variable *T = makeReg(IceType_f64);
2309 // Movd requires its fp argument (in this case, the bitcast destination)
2310 // to be an xmm register.
2311 T->setWeightInfinite();
2312 _movd(T, Src0RM);
2313 _mov(Dest, T);
2314 } else {
2315 Src0 = legalize(Src0);
2316 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2317 Variable *T = Func->makeVariable(Dest->getType());
2318 _movq(T, Src0);
2319 _movq(Dest, T);
2320 break;
2321 }
2322 // a.f64 = bitcast b.i64 ==>
2323 // t_lo.i32 = b_lo.i32
2324 // FakeDef(s.f64)
2325 // lo(s.f64) = t_lo.i32
2326 // t_hi.i32 = b_hi.i32
2327 // hi(s.f64) = t_hi.i32
2328 // a.f64 = s.f64
2218 typename Traits::SpillVariable *SpillVar = 2329 typename Traits::SpillVariable *SpillVar =
2219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); 2330 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2220 SpillVar->setLinkedTo(Src0Var); 2331 SpillVar->setLinkedTo(Dest);
2221 Variable *Spill = SpillVar; 2332 Variable *Spill = SpillVar;
2222 Spill->setWeight(RegWeight::Zero); 2333 Spill->setWeight(RegWeight::Zero);
2223 _movq(Spill, Src0RM); 2334
2224 SpillLo = Traits::VariableSplit::create(Func, Spill, 2335 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2225 Traits::VariableSplit::Low); 2336 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2226 SpillHi = Traits::VariableSplit::create(Func, Spill, 2337 Func, Spill, Traits::VariableSplit::Low);
2227 Traits::VariableSplit::High); 2338 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2228 } else { 2339 Func, Spill, Traits::VariableSplit::High);
2229 SpillLo = loOperand(Src0RM); 2340 _mov(T_Lo, loOperand(Src0));
2230 SpillHi = hiOperand(Src0RM); 2341 // Technically, the Spill is defined after the _store happens, but
2342 // SpillLo is considered a "use" of Spill so define Spill before it
2343 // is used.
2344 Context.insert(InstFakeDef::create(Func, Spill));
2345 _store(T_Lo, SpillLo);
2346 _mov(T_Hi, hiOperand(Src0));
2347 _store(T_Hi, SpillHi);
2348 _movq(Dest, Spill);
2231 } 2349 }
2232
2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2235 Variable *T_Lo = makeReg(IceType_i32);
2236 Variable *T_Hi = makeReg(IceType_i32);
2237
2238 _mov(T_Lo, SpillLo);
2239 _mov(DestLo, T_Lo);
2240 _mov(T_Hi, SpillHi);
2241 _mov(DestHi, T_Hi);
2242 } break;
2243 case IceType_f64: {
2244 Src0 = legalize(Src0);
2245 assert(Src0->getType() == IceType_i64);
2246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2247 Variable *T = Func->makeVariable(Dest->getType());
2248 _movq(T, Src0);
2249 _movq(Dest, T);
2250 break;
2251 }
2252 // a.f64 = bitcast b.i64 ==>
2253 // t_lo.i32 = b_lo.i32
2254 // FakeDef(s.f64)
2255 // lo(s.f64) = t_lo.i32
2256 // t_hi.i32 = b_hi.i32
2257 // hi(s.f64) = t_hi.i32
2258 // a.f64 = s.f64
2259 typename Traits::SpillVariable *SpillVar =
2260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2261 SpillVar->setLinkedTo(Dest);
2262 Variable *Spill = SpillVar;
2263 Spill->setWeight(RegWeight::Zero);
2264
2265 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2267 Func, Spill, Traits::VariableSplit::Low);
2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2269 Func, Spill, Traits::VariableSplit::High);
2270 _mov(T_Lo, loOperand(Src0));
2271 // Technically, the Spill is defined after the _store happens, but
2272 // SpillLo is considered a "use" of Spill so define Spill before it
2273 // is used.
2274 Context.insert(InstFakeDef::create(Func, Spill));
2275 _store(T_Lo, SpillLo);
2276 _mov(T_Hi, hiOperand(Src0));
2277 _store(T_Hi, SpillHi);
2278 _movq(Dest, Spill);
2279 } break; 2350 } break;
2280 case IceType_v8i1: { 2351 case IceType_v8i1: {
2281 assert(Src0->getType() == IceType_i8); 2352 assert(Src0->getType() == IceType_i8);
2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); 2353 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
2283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 2354 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2284 // Arguments to functions are required to be at least 32 bits wide. 2355 // Arguments to functions are required to be at least 32 bits wide.
2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); 2356 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2286 Call->addArg(Src0AsI32); 2357 Call->addArg(Src0AsI32);
2287 lowerCall(Call); 2358 lowerCall(Call);
2288 } break; 2359 } break;
(...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after
2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2679 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2609 _pxor(T, MinusOne); 2680 _pxor(T, MinusOne);
2610 } break; 2681 } break;
2611 } 2682 }
2612 2683
2613 _movp(Dest, T); 2684 _movp(Dest, T);
2614 eliminateNextVectorSextInstruction(Dest); 2685 eliminateNextVectorSextInstruction(Dest);
2615 return; 2686 return;
2616 } 2687 }
2617 2688
2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2689 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2619 if (Src0->getType() == IceType_i64) { 2690 lowerIcmp64(Inst);
2620 InstIcmp::ICond Condition = Inst->getCondition();
2621 size_t Index = static_cast<size_t>(Condition);
2622 assert(Index < Traits::TableIcmp64Size);
2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2627 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2628 Constant *One = Ctx->getConstantInt32(1);
2629 typename Traits::Insts::Label *LabelFalse =
2630 Traits::Insts::Label::create(Func, this);
2631 typename Traits::Insts::Label *LabelTrue =
2632 Traits::Insts::Label::create(Func, this);
2633 _mov(Dest, One);
2634 _cmp(Src0HiRM, Src1HiRI);
2635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2636 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2638 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2639 _cmp(Src0LoRM, Src1LoRI);
2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2641 Context.insert(LabelFalse);
2642 _mov_nonkillable(Dest, Zero);
2643 Context.insert(LabelTrue);
2644 return; 2691 return;
2645 } 2692 }
2646 2693
2647 // cmp b, c 2694 // cmp b, c
2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2695 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2649 _cmp(Src0RM, Src1); 2696 _cmp(Src0RM, Src1);
2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); 2697 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
2651 } 2698 }
2652 2699
2700 template <typename Machine>
2701 template <typename T>
2702 typename std::enable_if<!T::Is64Bit, void>::type
2703 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
2704 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2705 Operand *Src0 = legalize(Inst->getSrc(0));
2706 Operand *Src1 = legalize(Inst->getSrc(1));
2707 Variable *Dest = Inst->getDest();
2708 InstIcmp::ICond Condition = Inst->getCondition();
2709 size_t Index = static_cast<size_t>(Condition);
2710 assert(Index < Traits::TableIcmp64Size);
2711 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2712 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2713 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2714 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2715 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2716 Constant *One = Ctx->getConstantInt32(1);
2717 typename Traits::Insts::Label *LabelFalse =
2718 Traits::Insts::Label::create(Func, this);
2719 typename Traits::Insts::Label *LabelTrue =
2720 Traits::Insts::Label::create(Func, this);
2721 _mov(Dest, One);
2722 _cmp(Src0HiRM, Src1HiRI);
2723 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2724 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2725 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2726 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2727 _cmp(Src0LoRM, Src1LoRI);
2728 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2729 Context.insert(LabelFalse);
2730 _mov_nonkillable(Dest, Zero);
2731 Context.insert(LabelTrue);
2732 }
2733
2653 template <class Machine> 2734 template <class Machine>
2654 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 2735 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2655 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2736 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 2737 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2657 ConstantInteger32 *ElementIndex = 2738 ConstantInteger32 *ElementIndex =
2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 2739 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2659 // Only constant indices are allowed in PNaCl IR. 2740 // Only constant indices are allowed in PNaCl IR.
2660 assert(ElementIndex); 2741 assert(ElementIndex);
2661 unsigned Index = ElementIndex->getValue(); 2742 unsigned Index = ElementIndex->getValue();
2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 2743 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
2841 } 2922 }
2842 case Intrinsics::AtomicLoad: { 2923 case Intrinsics::AtomicLoad: {
2843 // We require the memory address to be naturally aligned. 2924 // We require the memory address to be naturally aligned.
2844 // Given that is the case, then normal loads are atomic. 2925 // Given that is the case, then normal loads are atomic.
2845 if (!Intrinsics::isMemoryOrderValid( 2926 if (!Intrinsics::isMemoryOrderValid(
2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) { 2927 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2847 Func->setError("Unexpected memory ordering for AtomicLoad"); 2928 Func->setError("Unexpected memory ordering for AtomicLoad");
2848 return; 2929 return;
2849 } 2930 }
2850 Variable *Dest = Instr->getDest(); 2931 Variable *Dest = Instr->getDest();
2851 if (Dest->getType() == IceType_i64) { 2932 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2852 // Follow what GCC does and use a movq instead of what lowerLoad() 2933 // Follow what GCC does and use a movq instead of what lowerLoad()
2853 // normally does (split the load into two). 2934 // normally does (split the load into two).
2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 2935 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2855 // can't happen anyway, since this is x86-32 and integer arithmetic only 2936 // can't happen anyway, since this is x86-32 and integer arithmetic only
2856 // happens on 32-bit quantities. 2937 // happens on 32-bit quantities.
2857 Variable *T = makeReg(IceType_f64); 2938 Variable *T = makeReg(IceType_f64);
2858 typename Traits::X86OperandMem *Addr = 2939 typename Traits::X86OperandMem *Addr =
2859 formMemoryOperand(Instr->getArg(0), IceType_f64); 2940 formMemoryOperand(Instr->getArg(0), IceType_f64);
2860 _movq(T, Addr); 2941 _movq(T, Addr);
2861 // Then cast the bits back out of the XMM register to the i64 Dest. 2942 // Then cast the bits back out of the XMM register to the i64 Dest.
(...skipping 29 matching lines...) Expand all
2891 if (!Intrinsics::isMemoryOrderValid( 2972 if (!Intrinsics::isMemoryOrderValid(
2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 2973 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2893 Func->setError("Unexpected memory ordering for AtomicStore"); 2974 Func->setError("Unexpected memory ordering for AtomicStore");
2894 return; 2975 return;
2895 } 2976 }
2896 // We require the memory address to be naturally aligned. 2977 // We require the memory address to be naturally aligned.
2897 // Given that is the case, then normal stores are atomic. 2978 // Given that is the case, then normal stores are atomic.
2898 // Add a fence after the store to make it visible. 2979 // Add a fence after the store to make it visible.
2899 Operand *Value = Instr->getArg(0); 2980 Operand *Value = Instr->getArg(0);
2900 Operand *Ptr = Instr->getArg(1); 2981 Operand *Ptr = Instr->getArg(1);
2901 if (Value->getType() == IceType_i64) { 2982 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
2902 // Use a movq instead of what lowerStore() normally does 2983 // Use a movq instead of what lowerStore() normally does
2903 // (split the store into two), following what GCC does. 2984 // (split the store into two), following what GCC does.
2904 // Cast the bits from int -> to an xmm register first. 2985 // Cast the bits from int -> to an xmm register first.
2905 Variable *T = makeReg(IceType_f64); 2986 Variable *T = makeReg(IceType_f64);
2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2907 lowerCast(Cast); 2988 lowerCast(Cast);
2908 // Then store XMM w/ a movq. 2989 // Then store XMM w/ a movq.
2909 typename Traits::X86OperandMem *Addr = 2990 typename Traits::X86OperandMem *Addr =
2910 formMemoryOperand(Ptr, IceType_f64); 2991 formMemoryOperand(Ptr, IceType_f64);
2911 _storeq(T, Addr); 2992 _storeq(T, Addr);
2912 _mfence(); 2993 _mfence();
2913 return; 2994 return;
2914 } 2995 }
2915 InstStore *Store = InstStore::create(Func, Value, Ptr); 2996 InstStore *Store = InstStore::create(Func, Value, Ptr);
2916 lowerStore(Store); 2997 lowerStore(Store);
2917 _mfence(); 2998 _mfence();
2918 return; 2999 return;
2919 } 3000 }
2920 case Intrinsics::Bswap: { 3001 case Intrinsics::Bswap: {
2921 Variable *Dest = Instr->getDest(); 3002 Variable *Dest = Instr->getDest();
2922 Operand *Val = Instr->getArg(0); 3003 Operand *Val = Instr->getArg(0);
2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the 3004 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2924 // argument must be a register. Use rotate left for 16-bit bswap. 3005 // argument must be a register. Use rotate left for 16-bit bswap.
2925 if (Val->getType() == IceType_i64) { 3006 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2926 Val = legalizeUndef(Val); 3007 Val = legalizeUndef(Val);
2927 Variable *T_Lo = legalizeToReg(loOperand(Val)); 3008 Variable *T_Lo = legalizeToReg(loOperand(Val));
2928 Variable *T_Hi = legalizeToReg(hiOperand(Val)); 3009 Variable *T_Hi = legalizeToReg(hiOperand(Val));
2929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2931 _bswap(T_Lo); 3012 _bswap(T_Lo);
2932 _bswap(T_Hi); 3013 _bswap(T_Hi);
2933 _mov(DestLo, T_Hi); 3014 _mov(DestLo, T_Hi);
2934 _mov(DestHi, T_Lo); 3015 _mov(DestHi, T_Lo);
2935 } else if (Val->getType() == IceType_i32) { 3016 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
3017 Val->getType() == IceType_i32) {
2936 Variable *T = legalizeToReg(Val); 3018 Variable *T = legalizeToReg(Val);
2937 _bswap(T); 3019 _bswap(T);
2938 _mov(Dest, T); 3020 _mov(Dest, T);
2939 } else { 3021 } else {
2940 assert(Val->getType() == IceType_i16); 3022 assert(Val->getType() == IceType_i16);
2941 Constant *Eight = Ctx->getConstantInt16(8); 3023 Constant *Eight = Ctx->getConstantInt16(8);
2942 Variable *T = nullptr; 3024 Variable *T = nullptr;
2943 Val = legalize(Val); 3025 Val = legalize(Val);
2944 _mov(T, Val); 3026 _mov(T, Val);
2945 _rol(T, Eight); 3027 _rol(T, Eight);
2946 _mov(Dest, T); 3028 _mov(Dest, T);
2947 } 3029 }
2948 return; 3030 return;
2949 } 3031 }
2950 case Intrinsics::Ctpop: { 3032 case Intrinsics::Ctpop: {
2951 Variable *Dest = Instr->getDest(); 3033 Variable *Dest = Instr->getDest();
3034 Variable *T = nullptr;
2952 Operand *Val = Instr->getArg(0); 3035 Operand *Val = Instr->getArg(0);
2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 3036 Type ValTy = Val->getType();
2954 ? H_call_ctpop_i32 3037 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
2955 : H_call_ctpop_i64, 3038
2956 Dest, 1); 3039 if (!Traits::Is64Bit) {
3040 T = Dest;
3041 } else {
3042 T = makeReg(IceType_i64);
3043 if (ValTy == IceType_i32) {
3044 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
3045 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
3046 // ensure we will not have any bits set on Val's upper 32 bits.
3047 Variable *V = makeReg(IceType_i64);
3048 _movzx(V, Val);
3049 Val = V;
3050 }
3051 ValTy = IceType_i64;
3052 }
3053
3054 InstCall *Call = makeHelperCall(
3055 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
2957 Call->addArg(Val); 3056 Call->addArg(Val);
2958 lowerCall(Call); 3057 lowerCall(Call);
2959 // The popcount helpers always return 32-bit values, while the intrinsic's 3058 // The popcount helpers always return 32-bit values, while the intrinsic's
2960 // signature matches the native POPCNT instruction and fills a 64-bit reg 3059 // signature matches the native POPCNT instruction and fills a 64-bit reg
2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case 3060 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2962 // the user doesn't do that in the IR. If the user does that in the IR, 3061 // the user doesn't do that in the IR. If the user does that in the IR,
2963 // then this zero'ing instruction is dead and gets optimized out. 3062 // then this zero'ing instruction is dead and gets optimized out.
2964 if (Val->getType() == IceType_i64) { 3063 if (!Traits::Is64Bit) {
2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3064 assert(T == Dest);
2966 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3065 if (Val->getType() == IceType_i64) {
2967 _mov(DestHi, Zero); 3066 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3067 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3068 _mov(DestHi, Zero);
3069 }
3070 } else {
3071 assert(Val->getType() == IceType_i64);
3072 // T is 64 bit. It needs to be copied to dest. We need to:
3073 //
3074 // T_1.32 = trunc T.64 to i32
3075 // T_2.64 = zext T_1.32 to i64
3076 // Dest.<<right_size>> = T_2.<<right_size>>
3077 //
3078 // which ensures the upper 32 bits will always be cleared. Just doing a
3079 //
3080 // mov Dest.32 = trunc T.32 to i32
3081 //
3082 // is dangerous because there's a chance the compiler will optimize this
3083 // copy out. To use _movzx we need two new registers (one 32-, and
3084 // another 64-bit wide.)
3085 Variable *T_1 = makeReg(IceType_i32);
3086 _mov(T_1, T);
3087 Variable *T_2 = makeReg(IceType_i64);
3088 _movzx(T_2, T_1);
3089 _mov(Dest, T_2);
2968 } 3090 }
2969 return; 3091 return;
2970 } 3092 }
2971 case Intrinsics::Ctlz: { 3093 case Intrinsics::Ctlz: {
2972 // The "is zero undef" parameter is ignored and we always return 3094 // The "is zero undef" parameter is ignored and we always return
2973 // a well-defined value. 3095 // a well-defined value.
2974 Operand *Val = legalize(Instr->getArg(0)); 3096 Operand *Val = legalize(Instr->getArg(0));
2975 Operand *FirstVal; 3097 Operand *FirstVal;
2976 Operand *SecondVal = nullptr; 3098 Operand *SecondVal = nullptr;
2977 if (Val->getType() == IceType_i64) { 3099 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2978 FirstVal = loOperand(Val); 3100 FirstVal = loOperand(Val);
2979 SecondVal = hiOperand(Val); 3101 SecondVal = hiOperand(Val);
2980 } else { 3102 } else {
2981 FirstVal = Val; 3103 FirstVal = Val;
2982 } 3104 }
2983 const bool IsCttz = false; 3105 const bool IsCttz = false;
2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3106 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2985 SecondVal); 3107 SecondVal);
2986 return; 3108 return;
2987 } 3109 }
2988 case Intrinsics::Cttz: { 3110 case Intrinsics::Cttz: {
2989 // The "is zero undef" parameter is ignored and we always return 3111 // The "is zero undef" parameter is ignored and we always return
2990 // a well-defined value. 3112 // a well-defined value.
2991 Operand *Val = legalize(Instr->getArg(0)); 3113 Operand *Val = legalize(Instr->getArg(0));
2992 Operand *FirstVal; 3114 Operand *FirstVal;
2993 Operand *SecondVal = nullptr; 3115 Operand *SecondVal = nullptr;
2994 if (Val->getType() == IceType_i64) { 3116 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2995 FirstVal = hiOperand(Val); 3117 FirstVal = hiOperand(Val);
2996 SecondVal = loOperand(Val); 3118 SecondVal = loOperand(Val);
2997 } else { 3119 } else {
2998 FirstVal = Val; 3120 FirstVal = Val;
2999 } 3121 }
3000 const bool IsCttz = true; 3122 const bool IsCttz = true;
3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3123 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3002 SecondVal); 3124 SecondVal);
3003 return; 3125 return;
3004 } 3126 }
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
3098 Func->setError("Should not be lowering UnknownIntrinsic"); 3220 Func->setError("Should not be lowering UnknownIntrinsic");
3099 return; 3221 return;
3100 } 3222 }
3101 return; 3223 return;
3102 } 3224 }
3103 3225
3104 template <class Machine> 3226 template <class Machine>
3105 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3227 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3106 Operand *Ptr, Operand *Expected, 3228 Operand *Ptr, Operand *Expected,
3107 Operand *Desired) { 3229 Operand *Desired) {
3108 if (Expected->getType() == IceType_i64) { 3230 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
3109 // Reserve the pre-colored registers first, before adding any more 3231 // Reserve the pre-colored registers first, before adding any more
3110 // infinite-weight variables from formMemoryOperand's legalization. 3232 // infinite-weight variables from formMemoryOperand's legalization.
3111 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3233 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3112 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3234 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3113 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3235 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3114 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3236 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3115 _mov(T_eax, loOperand(Expected)); 3237 _mov(T_eax, loOperand(Expected));
3116 _mov(T_edx, hiOperand(Expected)); 3238 _mov(T_edx, hiOperand(Expected));
3117 _mov(T_ebx, loOperand(Desired)); 3239 _mov(T_ebx, loOperand(Desired));
3118 _mov(T_ecx, hiOperand(Desired)); 3240 _mov(T_ecx, hiOperand(Desired));
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
3216 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3338 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3217 Operand *Ptr, Operand *Val) { 3339 Operand *Ptr, Operand *Val) {
3218 bool NeedsCmpxchg = false; 3340 bool NeedsCmpxchg = false;
3219 LowerBinOp Op_Lo = nullptr; 3341 LowerBinOp Op_Lo = nullptr;
3220 LowerBinOp Op_Hi = nullptr; 3342 LowerBinOp Op_Hi = nullptr;
3221 switch (Operation) { 3343 switch (Operation) {
3222 default: 3344 default:
3223 Func->setError("Unknown AtomicRMW operation"); 3345 Func->setError("Unknown AtomicRMW operation");
3224 return; 3346 return;
3225 case Intrinsics::AtomicAdd: { 3347 case Intrinsics::AtomicAdd: {
3226 if (Dest->getType() == IceType_i64) { 3348 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3227 // All the fall-through paths must set this to true, but use this 3349 // All the fall-through paths must set this to true, but use this
3228 // for asserting. 3350 // for asserting.
3229 NeedsCmpxchg = true; 3351 NeedsCmpxchg = true;
3230 Op_Lo = &TargetX86Base<Machine>::_add; 3352 Op_Lo = &TargetX86Base<Machine>::_add;
3231 Op_Hi = &TargetX86Base<Machine>::_adc; 3353 Op_Hi = &TargetX86Base<Machine>::_adc;
3232 break; 3354 break;
3233 } 3355 }
3234 typename Traits::X86OperandMem *Addr = 3356 typename Traits::X86OperandMem *Addr =
3235 formMemoryOperand(Ptr, Dest->getType()); 3357 formMemoryOperand(Ptr, Dest->getType());
3236 const bool Locked = true; 3358 const bool Locked = true;
3237 Variable *T = nullptr; 3359 Variable *T = nullptr;
3238 _mov(T, Val); 3360 _mov(T, Val);
3239 _xadd(Addr, T, Locked); 3361 _xadd(Addr, T, Locked);
3240 _mov(Dest, T); 3362 _mov(Dest, T);
3241 return; 3363 return;
3242 } 3364 }
3243 case Intrinsics::AtomicSub: { 3365 case Intrinsics::AtomicSub: {
3244 if (Dest->getType() == IceType_i64) { 3366 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3245 NeedsCmpxchg = true; 3367 NeedsCmpxchg = true;
3246 Op_Lo = &TargetX86Base<Machine>::_sub; 3368 Op_Lo = &TargetX86Base<Machine>::_sub;
3247 Op_Hi = &TargetX86Base<Machine>::_sbb; 3369 Op_Hi = &TargetX86Base<Machine>::_sbb;
3248 break; 3370 break;
3249 } 3371 }
3250 typename Traits::X86OperandMem *Addr = 3372 typename Traits::X86OperandMem *Addr =
3251 formMemoryOperand(Ptr, Dest->getType()); 3373 formMemoryOperand(Ptr, Dest->getType());
3252 const bool Locked = true; 3374 const bool Locked = true;
3253 Variable *T = nullptr; 3375 Variable *T = nullptr;
3254 _mov(T, Val); 3376 _mov(T, Val);
(...skipping 16 matching lines...) Expand all
3271 NeedsCmpxchg = true; 3393 NeedsCmpxchg = true;
3272 Op_Lo = &TargetX86Base<Machine>::_and; 3394 Op_Lo = &TargetX86Base<Machine>::_and;
3273 Op_Hi = &TargetX86Base<Machine>::_and; 3395 Op_Hi = &TargetX86Base<Machine>::_and;
3274 break; 3396 break;
3275 case Intrinsics::AtomicXor: 3397 case Intrinsics::AtomicXor:
3276 NeedsCmpxchg = true; 3398 NeedsCmpxchg = true;
3277 Op_Lo = &TargetX86Base<Machine>::_xor; 3399 Op_Lo = &TargetX86Base<Machine>::_xor;
3278 Op_Hi = &TargetX86Base<Machine>::_xor; 3400 Op_Hi = &TargetX86Base<Machine>::_xor;
3279 break; 3401 break;
3280 case Intrinsics::AtomicExchange: 3402 case Intrinsics::AtomicExchange:
3281 if (Dest->getType() == IceType_i64) { 3403 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3282 NeedsCmpxchg = true; 3404 NeedsCmpxchg = true;
3283 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3405 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3284 // just need to be moved to the ecx and ebx registers. 3406 // just need to be moved to the ecx and ebx registers.
3285 Op_Lo = nullptr; 3407 Op_Lo = nullptr;
3286 Op_Hi = nullptr; 3408 Op_Hi = nullptr;
3287 break; 3409 break;
3288 } 3410 }
3289 typename Traits::X86OperandMem *Addr = 3411 typename Traits::X86OperandMem *Addr =
3290 formMemoryOperand(Ptr, Dest->getType()); 3412 formMemoryOperand(Ptr, Dest->getType());
3291 Variable *T = nullptr; 3413 Variable *T = nullptr;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
3325 // .LABEL: 3447 // .LABEL:
3326 // mov <reg>, eax 3448 // mov <reg>, eax
3327 // op <reg>, [desired_adj] 3449 // op <reg>, [desired_adj]
3328 // lock cmpxchg [ptr], <reg> 3450 // lock cmpxchg [ptr], <reg>
3329 // jne .LABEL 3451 // jne .LABEL
3330 // mov <dest>, eax 3452 // mov <dest>, eax
3331 // 3453 //
3332 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3454 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3333 Val = legalize(Val); 3455 Val = legalize(Val);
3334 Type Ty = Val->getType(); 3456 Type Ty = Val->getType();
3335 if (Ty == IceType_i64) { 3457 if (!Traits::Is64Bit && Ty == IceType_i64) {
3336 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3458 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3337 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3459 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3338 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3460 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3339 _mov(T_eax, loOperand(Addr)); 3461 _mov(T_eax, loOperand(Addr));
3340 _mov(T_edx, hiOperand(Addr)); 3462 _mov(T_edx, hiOperand(Addr));
3341 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3463 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3342 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3464 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3343 typename Traits::Insts::Label *Label = 3465 typename Traits::Insts::Label *Label =
3344 Traits::Insts::Label::create(Func, this); 3466 Traits::Insts::Label::create(Func, this);
3345 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3467 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
3457 if (Cttz) { 3579 if (Cttz) {
3458 _mov(T_Dest, ThirtyTwo); 3580 _mov(T_Dest, ThirtyTwo);
3459 } else { 3581 } else {
3460 Constant *SixtyThree = Ctx->getConstantInt32(63); 3582 Constant *SixtyThree = Ctx->getConstantInt32(63);
3461 _mov(T_Dest, SixtyThree); 3583 _mov(T_Dest, SixtyThree);
3462 } 3584 }
3463 _cmov(T_Dest, T, Traits::Cond::Br_ne); 3585 _cmov(T_Dest, T, Traits::Cond::Br_ne);
3464 if (!Cttz) { 3586 if (!Cttz) {
3465 _xor(T_Dest, ThirtyOne); 3587 _xor(T_Dest, ThirtyOne);
3466 } 3588 }
3467 if (Ty == IceType_i32) { 3589 if (Traits::Is64Bit || Ty == IceType_i32) {
3468 _mov(Dest, T_Dest); 3590 _mov(Dest, T_Dest);
3469 return; 3591 return;
3470 } 3592 }
3471 _add(T_Dest, ThirtyTwo); 3593 _add(T_Dest, ThirtyTwo);
3472 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3594 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3473 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3595 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3474 // Will be using "test" on this, so we need a registerized variable. 3596 // Will be using "test" on this, so we need a registerized variable.
3475 Variable *SecondVar = legalizeToReg(SecondVal); 3597 Variable *SecondVar = legalizeToReg(SecondVal);
3476 Variable *T_Dest2 = makeReg(IceType_i32); 3598 Variable *T_Dest2 = makeReg(IceType_i32);
3477 if (Cttz) { 3599 if (Cttz) {
(...skipping 406 matching lines...) Expand 10 before | Expand all | Expand 10 after
3884 // Index is Index=Var-Const ==> 4006 // Index is Index=Var-Const ==>
3885 // set Index=Var, Offset-=(Const<<Shift) 4007 // set Index=Var, Offset-=(Const<<Shift)
3886 4008
3887 // TODO: consider overflow issues with respect to Offset. 4009 // TODO: consider overflow issues with respect to Offset.
3888 // TODO: handle symbolic constants. 4010 // TODO: handle symbolic constants.
3889 } 4011 }
3890 } 4012 }
3891 4013
3892 template <class Machine> 4014 template <class Machine>
3893 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { 4015 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
3894 // A Load instruction can be treated the same as an Assign instruction, after 4016 // A Load instruction can be treated the same as an Assign instruction,
Jim Stichnoth 2015/08/11 16:01:37 formatting
John 2015/08/12 19:27:55 Done.
4017 // after
3895 // the source operand is transformed into an Traits::X86OperandMem operand. 4018 // the source operand is transformed into an Traits::X86OperandMem operand.
3896 // Note that the address mode optimization already creates an 4019 // Note that the address mode optimization already creates an
3897 // Traits::X86OperandMem operand, so it doesn't need another level of 4020 // Traits::X86OperandMem operand, so it doesn't need another level of
3898 // transformation. 4021 // transformation.
3899 Variable *DestLoad = Load->getDest(); 4022 Variable *DestLoad = Load->getDest();
3900 Type Ty = DestLoad->getType(); 4023 Type Ty = DestLoad->getType();
3901 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4024 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
3902 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 4025 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
3903 lowerAssign(Assign); 4026 lowerAssign(Assign);
3904 } 4027 }
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
4046 return; 4169 return;
4047 } 4170 }
4048 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4171 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4049 // But if SrcT is immediate, we might be able to do better, as 4172 // But if SrcT is immediate, we might be able to do better, as
4050 // the cmov instruction doesn't allow an immediate operand: 4173 // the cmov instruction doesn't allow an immediate operand:
4051 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4174 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4052 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4175 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4053 std::swap(SrcT, SrcF); 4176 std::swap(SrcT, SrcF);
4054 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4177 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4055 } 4178 }
4056 if (DestTy == IceType_i64) { 4179 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4057 SrcT = legalizeUndef(SrcT); 4180 SrcT = legalizeUndef(SrcT);
4058 SrcF = legalizeUndef(SrcF); 4181 SrcF = legalizeUndef(SrcF);
4059 // Set the low portion. 4182 // Set the low portion.
4060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4183 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4061 Variable *TLo = nullptr; 4184 Variable *TLo = nullptr;
4062 Operand *SrcFLo = legalize(loOperand(SrcF)); 4185 Operand *SrcFLo = legalize(loOperand(SrcF));
4063 _mov(TLo, SrcFLo); 4186 _mov(TLo, SrcFLo);
4064 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); 4187 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4065 _cmov(TLo, SrcTLo, Cond); 4188 _cmov(TLo, SrcTLo, Cond);
4066 _mov(DestLo, TLo); 4189 _mov(DestLo, TLo);
4067 // Set the high portion. 4190 // Set the high portion.
4068 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4191 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4069 Variable *THi = nullptr; 4192 Variable *THi = nullptr;
4070 Operand *SrcFHi = legalize(hiOperand(SrcF)); 4193 Operand *SrcFHi = legalize(hiOperand(SrcF));
4071 _mov(THi, SrcFHi); 4194 _mov(THi, SrcFHi);
4072 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); 4195 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4073 _cmov(THi, SrcTHi, Cond); 4196 _cmov(THi, SrcTHi, Cond);
4074 _mov(DestHi, THi); 4197 _mov(DestHi, THi);
4075 return; 4198 return;
4076 } 4199 }
4077 4200
4078 assert(DestTy == IceType_i16 || DestTy == IceType_i32); 4201 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4202 (Traits::Is64Bit && DestTy == IceType_i64));
4079 Variable *T = nullptr; 4203 Variable *T = nullptr;
4080 SrcF = legalize(SrcF); 4204 SrcF = legalize(SrcF);
4081 _mov(T, SrcF); 4205 _mov(T, SrcF);
4082 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4206 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4083 _cmov(T, SrcT, Cond); 4207 _cmov(T, SrcT, Cond);
4084 _mov(Dest, T); 4208 _mov(Dest, T);
4085 } 4209 }
4086 4210
4087 template <class Machine> 4211 template <class Machine>
4088 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4212 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4089 Operand *Value = Inst->getData(); 4213 Operand *Value = Inst->getData();
4090 Operand *Addr = Inst->getAddr(); 4214 Operand *Addr = Inst->getAddr();
4091 typename Traits::X86OperandMem *NewAddr = 4215 typename Traits::X86OperandMem *NewAddr =
4092 formMemoryOperand(Addr, Value->getType()); 4216 formMemoryOperand(Addr, Value->getType());
4093 Type Ty = NewAddr->getType(); 4217 Type Ty = NewAddr->getType();
4094 4218
4095 if (Ty == IceType_i64) { 4219 if (!Traits::Is64Bit && Ty == IceType_i64) {
4096 Value = legalizeUndef(Value); 4220 Value = legalizeUndef(Value);
4097 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4221 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4098 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4222 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4099 _store(ValueHi, 4223 _store(ValueHi,
4100 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); 4224 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4101 _store(ValueLo, 4225 _store(ValueLo,
4102 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); 4226 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
4103 } else if (isVectorType(Ty)) { 4227 } else if (isVectorType(Ty)) {
4104 _storep(legalizeToReg(Value), NewAddr); 4228 _storep(legalizeToReg(Value), NewAddr);
4105 } else { 4229 } else {
(...skipping 27 matching lines...) Expand all
4133 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4257 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4134 Context.insert(NewStore); 4258 Context.insert(NewStore);
4135 } 4259 }
4136 } 4260 }
4137 4261
4138 template <class Machine> 4262 template <class Machine>
4139 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, 4263 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4140 uint64_t Min, uint64_t Max) { 4264 uint64_t Min, uint64_t Max) {
4141 // TODO(ascull): 64-bit should not reach here but only because it is not 4265 // TODO(ascull): 64-bit should not reach here but only because it is not
4142 // implemented yet. This should be able to handle the 64-bit case. 4266 // implemented yet. This should be able to handle the 64-bit case.
4143 assert(Comparison->getType() != IceType_i64); 4267 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
4144 // Subtracting 0 is a nop so don't do it 4268 // Subtracting 0 is a nop so don't do it
4145 if (Min != 0) { 4269 if (Min != 0) {
4146 // Avoid clobbering the comparison by copying it 4270 // Avoid clobbering the comparison by copying it
4147 Variable *T = nullptr; 4271 Variable *T = nullptr;
4148 _mov(T, Comparison); 4272 _mov(T, Comparison);
4149 _sub(T, Ctx->getConstantInt32(Min)); 4273 _sub(T, Ctx->getConstantInt32(Min));
4150 Comparison = T; 4274 Comparison = T;
4151 } 4275 }
4152 4276
4153 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); 4277 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
4232 4356
4233 template <class Machine> 4357 template <class Machine>
4234 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 4358 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4235 // Group cases together and navigate through them with a binary search 4359 // Group cases together and navigate through them with a binary search
4236 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); 4360 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4237 Operand *Src0 = Inst->getComparison(); 4361 Operand *Src0 = Inst->getComparison();
4238 CfgNode *DefaultTarget = Inst->getLabelDefault(); 4362 CfgNode *DefaultTarget = Inst->getLabelDefault();
4239 4363
4240 assert(CaseClusters.size() != 0); // Should always be at least one 4364 assert(CaseClusters.size() != 0); // Should always be at least one
4241 4365
4242 if (Src0->getType() == IceType_i64) { 4366 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
4243 Src0 = legalize(Src0); // get Base/Index into physical registers 4367 Src0 = legalize(Src0); // get Base/Index into physical registers
4244 Operand *Src0Lo = loOperand(Src0); 4368 Operand *Src0Lo = loOperand(Src0);
4245 Operand *Src0Hi = hiOperand(Src0); 4369 Operand *Src0Hi = hiOperand(Src0);
4246 if (CaseClusters.back().getHigh() > UINT32_MAX) { 4370 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4247 // TODO(ascull): handle 64-bit case properly (currently naive version) 4371 // TODO(ascull): handle 64-bit case properly (currently naive version)
4248 // This might be handled by a higher level lowering of switches. 4372 // This might be handled by a higher level lowering of switches.
4249 SizeT NumCases = Inst->getNumCases(); 4373 SizeT NumCases = Inst->getNumCases();
4250 if (NumCases >= 2) { 4374 if (NumCases >= 2) {
4251 Src0Lo = legalizeToReg(Src0Lo); 4375 Src0Lo = legalizeToReg(Src0Lo);
4252 Src0Hi = legalizeToReg(Src0Hi); 4376 Src0Hi = legalizeToReg(Src0Hi);
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
4437 // that follows. This means that the original Store instruction is 4561 // that follows. This means that the original Store instruction is
4438 // still there, either because the value being stored is used beyond 4562 // still there, either because the value being stored is used beyond
4439 // the Store instruction, or because dead code elimination did not 4563 // the Store instruction, or because dead code elimination did not
4440 // happen. In either case, we cancel RMW lowering (and the caller 4564 // happen. In either case, we cancel RMW lowering (and the caller
4441 // deletes the RMW instruction). 4565 // deletes the RMW instruction).
4442 if (!RMW->isLastUse(RMW->getBeacon())) 4566 if (!RMW->isLastUse(RMW->getBeacon()))
4443 return; 4567 return;
4444 Operand *Src = RMW->getData(); 4568 Operand *Src = RMW->getData();
4445 Type Ty = Src->getType(); 4569 Type Ty = Src->getType();
4446 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 4570 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4447 if (Ty == IceType_i64) { 4571 if (!Traits::Is64Bit && Ty == IceType_i64) {
4448 Src = legalizeUndef(Src); 4572 Src = legalizeUndef(Src);
4449 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 4573 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4450 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 4574 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
4451 typename Traits::X86OperandMem *AddrLo = 4575 typename Traits::X86OperandMem *AddrLo =
4452 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); 4576 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4453 typename Traits::X86OperandMem *AddrHi = 4577 typename Traits::X86OperandMem *AddrHi =
4454 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); 4578 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
4455 switch (RMW->getOp()) { 4579 switch (RMW->getOp()) {
4456 default: 4580 default:
4457 // TODO(stichnot): Implement other arithmetic operators. 4581 // TODO(stichnot): Implement other arithmetic operators.
(...skipping 13 matching lines...) Expand all
4471 case InstArithmetic::Or: 4595 case InstArithmetic::Or:
4472 _or_rmw(AddrLo, SrcLo); 4596 _or_rmw(AddrLo, SrcLo);
4473 _or_rmw(AddrHi, SrcHi); 4597 _or_rmw(AddrHi, SrcHi);
4474 return; 4598 return;
4475 case InstArithmetic::Xor: 4599 case InstArithmetic::Xor:
4476 _xor_rmw(AddrLo, SrcLo); 4600 _xor_rmw(AddrLo, SrcLo);
4477 _xor_rmw(AddrHi, SrcHi); 4601 _xor_rmw(AddrHi, SrcHi);
4478 return; 4602 return;
4479 } 4603 }
4480 } else { 4604 } else {
4481 // i8, i16, i32 4605 // x86-32: i8, i16, i32
4606 // x86-64: i8, i16, i32, i64
4482 switch (RMW->getOp()) { 4607 switch (RMW->getOp()) {
4483 default: 4608 default:
4484 // TODO(stichnot): Implement other arithmetic operators. 4609 // TODO(stichnot): Implement other arithmetic operators.
4485 break; 4610 break;
4486 case InstArithmetic::Add: 4611 case InstArithmetic::Add:
4487 Src = legalize(Src, Legal_Reg | Legal_Imm); 4612 Src = legalize(Src, Legal_Reg | Legal_Imm);
4488 _add_rmw(Addr, Src); 4613 _add_rmw(Addr, Src);
4489 return; 4614 return;
4490 case InstArithmetic::Sub: 4615 case InstArithmetic::Sub:
4491 Src = legalize(Src, Legal_Reg | Legal_Imm); 4616 Src = legalize(Src, Legal_Reg | Legal_Imm);
(...skipping 24 matching lines...) Expand all
4516 } else { 4641 } else {
4517 TargetLowering::lowerOther(Instr); 4642 TargetLowering::lowerOther(Instr);
4518 } 4643 }
4519 } 4644 }
4520 4645
4521 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4646 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4522 /// preserve integrity of liveness analysis. Undef values are also 4647 /// preserve integrity of liveness analysis. Undef values are also
4523 /// turned into zeroes, since loOperand() and hiOperand() don't expect 4648 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4524 /// Undef input. 4649 /// Undef input.
4525 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4650 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4526 // Pause constant blinding or pooling, blinding or pooling will be done later 4651 if (Traits::Is64Bit) {
4652 // On x86-64 we don't need to prelower phis -- the architecture can handle
4653 // 64-bit integer natively.
4654 return;
4655 }
4656
4657 // Pause constant blinding or pooling, blinding or pooling will be done
4658 // later
Jim Stichnoth 2015/08/11 16:01:37 formatting
John 2015/08/12 19:27:55 Done.
4527 // during phi lowering assignments 4659 // during phi lowering assignments
4528 BoolFlagSaver B(RandomizationPoolingPaused, true); 4660 BoolFlagSaver B(RandomizationPoolingPaused, true);
4529 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 4661 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4530 this, Context.getNode(), Func); 4662 this, Context.getNode(), Func);
4531 } 4663 }
4532 4664
4533 // There is no support for loading or emitting vector constants, so the 4665 // There is no support for loading or emitting vector constants, so the
4534 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4666 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4535 // etc. are initialized with register operations. 4667 // etc. are initialized with register operations.
4536 // 4668 //
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
4678 if (auto *Const = llvm::dyn_cast<Constant>(From)) { 4810 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
4679 if (llvm::isa<ConstantUndef>(Const)) { 4811 if (llvm::isa<ConstantUndef>(Const)) {
4680 From = legalizeUndef(Const, RegNum); 4812 From = legalizeUndef(Const, RegNum);
4681 if (isVectorType(Ty)) 4813 if (isVectorType(Ty))
4682 return From; 4814 return From;
4683 Const = llvm::cast<Constant>(From); 4815 Const = llvm::cast<Constant>(From);
4684 } 4816 }
4685 // There should be no constants of vector type (other than undef). 4817 // There should be no constants of vector type (other than undef).
4686 assert(!isVectorType(Ty)); 4818 assert(!isVectorType(Ty));
4687 4819
4820 // If the operand is a 64 bit constant integer we need to legalize it to a
4821 // register in x86-64.
4822 if (Traits::Is64Bit) {
4823 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Const)) {
4824 Variable *V = copyToReg(C, RegNum);
4825 V->setWeightInfinite();
4826 return V;
4827 }
4828 }
4829
4688 // If the operand is an 32 bit constant integer, we should check 4830 // If the operand is an 32 bit constant integer, we should check
4689 // whether we need to randomize it or pool it. 4831 // whether we need to randomize it or pool it.
4690 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { 4832 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
4691 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); 4833 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
4692 if (NewConst != Const) { 4834 if (NewConst != Const) {
4693 return NewConst; 4835 return NewConst;
4694 } 4836 }
4695 } 4837 }
4696 4838
4697 // Convert a scalar floating point constant into an explicit 4839 // Convert a scalar floating point constant into an explicit
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
4815 } 4957 }
4816 // Do legalization, which contains randomization/pooling 4958 // Do legalization, which contains randomization/pooling
4817 // or do randomization/pooling. 4959 // or do randomization/pooling.
4818 return llvm::cast<typename Traits::X86OperandMem>( 4960 return llvm::cast<typename Traits::X86OperandMem>(
4819 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 4961 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
4820 } 4962 }
4821 4963
4822 template <class Machine> 4964 template <class Machine>
4823 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 4965 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
4824 // There aren't any 64-bit integer registers for x86-32. 4966 // There aren't any 64-bit integer registers for x86-32.
4825 assert(Type != IceType_i64); 4967 assert(Traits::Is64Bit || Type != IceType_i64);
4826 Variable *Reg = Func->makeVariable(Type); 4968 Variable *Reg = Func->makeVariable(Type);
4827 if (RegNum == Variable::NoRegister) 4969 if (RegNum == Variable::NoRegister)
4828 Reg->setWeightInfinite(); 4970 Reg->setWeightInfinite();
4829 else 4971 else
4830 Reg->setRegNum(RegNum); 4972 Reg->setRegNum(RegNum);
4831 return Reg; 4973 return Reg;
4832 } 4974 }
4833 4975
4834 template <class Machine> void TargetX86Base<Machine>::postLower() { 4976 template <class Machine> void TargetX86Base<Machine>::postLower() {
4835 if (Ctx->getFlags().getOptLevel() == Opt_m1) 4977 if (Ctx->getFlags().getOptLevel() == Opt_m1)
(...skipping 11 matching lines...) Expand all
4847 4989
4848 template <class Machine> 4990 template <class Machine>
4849 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { 4991 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
4850 if (!BuildDefs::dump()) 4992 if (!BuildDefs::dump())
4851 return; 4993 return;
4852 Ostream &Str = Ctx->getStrEmit(); 4994 Ostream &Str = Ctx->getStrEmit();
4853 Str << getConstantPrefix() << C->getValue(); 4995 Str << getConstantPrefix() << C->getValue();
4854 } 4996 }
4855 4997
4856 template <class Machine> 4998 template <class Machine>
4857 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { 4999 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
4858 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 5000 if (!Traits::Is64Bit) {
5001 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5002 } else {
5003 if (!BuildDefs::dump())
5004 return;
5005 Ostream &Str = Ctx->getStrEmit();
5006 Str << getConstantPrefix() << C->getValue();
5007 }
4859 } 5008 }
4860 5009
4861 template <class Machine> 5010 template <class Machine>
4862 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { 5011 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
4863 if (!BuildDefs::dump()) 5012 if (!BuildDefs::dump())
4864 return; 5013 return;
4865 Ostream &Str = Ctx->getStrEmit(); 5014 Ostream &Str = Ctx->getStrEmit();
4866 C->emitPoolLabel(Str); 5015 C->emitPoolLabel(Str);
4867 } 5016 }
4868 5017
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
4993 Constant *Mask1 = Ctx->getConstantInt( 5142 Constant *Mask1 = Ctx->getConstantInt(
4994 MemOperand->getOffset()->getType(), Cookie + Value); 5143 MemOperand->getOffset()->getType(), Cookie + Value);
4995 Constant *Mask2 = 5144 Constant *Mask2 =
4996 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5145 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
4997 5146
4998 typename Traits::X86OperandMem *TempMemOperand = 5147 typename Traits::X86OperandMem *TempMemOperand =
4999 Traits::X86OperandMem::create(Func, MemOperand->getType(), 5148 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5000 MemOperand->getBase(), Mask1); 5149 MemOperand->getBase(), Mask1);
5001 // If we have already assigned a physical register, we must come from 5150 // If we have already assigned a physical register, we must come from
5002 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5151 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5003 // the assigned register as this assignment is that start of its use-def 5152 // the assigned register as this assignment is that start of its
5153 // use-def
Jim Stichnoth 2015/08/11 16:01:37 formatting
John 2015/08/12 19:27:55 Done.
5004 // chain. So we add RegNum argument here. 5154 // chain. So we add RegNum argument here.
5005 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 5155 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5006 _lea(RegTemp, TempMemOperand); 5156 _lea(RegTemp, TempMemOperand);
5007 // As source operand doesn't use the dstreg, we don't need to add 5157 // As source operand doesn't use the dstreg, we don't need to add
5008 // _set_dest_nonkillable(). 5158 // _set_dest_nonkillable().
5009 // But if we use the same Dest Reg, that is, with RegNum 5159 // But if we use the same Dest Reg, that is, with RegNum
5010 // assigned, we should add this _set_dest_nonkillable() 5160 // assigned, we should add this _set_dest_nonkillable()
5011 if (RegNum != Variable::NoRegister) 5161 if (RegNum != Variable::NoRegister)
5012 _set_dest_nonkillable(); 5162 _set_dest_nonkillable();
5013 5163
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
5077 } 5227 }
5078 // the offset is not eligible for blinding or pooling, return the original 5228 // the offset is not eligible for blinding or pooling, return the original
5079 // mem operand 5229 // mem operand
5080 return MemOperand; 5230 return MemOperand;
5081 } 5231 }
5082 5232
5083 } // end of namespace X86Internal 5233 } // end of namespace X86Internal
5084 } // end of namespace Ice 5234 } // end of namespace Ice
5085 5235
5086 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5236 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698