Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(609)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1273153002: Subzero. Native 64-bit int arithmetic on x86-64. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixes tests & make format Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | unittest/AssemblerX8632/DataMov.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
70 // NumUses counts the number of times Var is used as a source operand in the 70 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var, 71 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var. 72 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0; 73 uint32_t NumUses = 0;
74 }; 74 };
75 75
76 template <class MachineTraits> class BoolFolding { 76 template <class MachineTraits> class BoolFolding {
77 public: 77 public:
78 enum BoolFoldingProducerKind { 78 enum BoolFoldingProducerKind {
79 PK_None, 79 PK_None,
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
80 PK_Icmp32, 81 PK_Icmp32,
81 PK_Icmp64, 82 PK_Icmp64,
82 PK_Fcmp, 83 PK_Fcmp,
83 PK_Trunc 84 PK_Trunc
84 }; 85 };
85 86
86 /// Currently the actual enum values are not used (other than CK_None), but we 87 /// Currently the actual enum values are not used (other than CK_None), but we
87 /// go ahead and produce them anyway for symmetry with the 88 /// go ahead and produce them anyway for symmetry with the
88 /// BoolFoldingProducerKind. 89 /// BoolFoldingProducerKind.
89 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
(...skipping 23 matching lines...) Expand all
113 }; 114 };
114 115
115 template <class MachineTraits> 116 template <class MachineTraits>
116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
118 119
119 template <class MachineTraits> 120 template <class MachineTraits>
120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
122 if (llvm::isa<InstIcmp>(Instr)) { 123 if (llvm::isa<InstIcmp>(Instr)) {
123 if (Instr->getSrc(0)->getType() != IceType_i64) 124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
124 return PK_Icmp32; 125 return PK_Icmp32;
125 return PK_None; // TODO(stichnot): actually PK_Icmp64; 126 return PK_None; // TODO(stichnot): actually PK_Icmp64;
126 } 127 }
127 return PK_None; // TODO(stichnot): remove this 128 return PK_None; // TODO(stichnot): remove this
128 129
129 if (llvm::isa<InstFcmp>(Instr)) 130 if (llvm::isa<InstFcmp>(Instr))
130 return PK_Fcmp; 131 return PK_Fcmp;
131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
132 switch (Cast->getCastKind()) { 133 switch (Cast->getCastKind()) {
133 default: 134 default:
(...skipping 502 matching lines...) Expand 10 before | Expand all | Expand 10 after
636 // instruction or equivalent. 637 // instruction or equivalent.
637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 638 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
638 // An InstLoad always qualifies. 639 // An InstLoad always qualifies.
639 LoadDest = Load->getDest(); 640 LoadDest = Load->getDest();
640 const bool DoLegalize = false; 641 const bool DoLegalize = false;
641 LoadSrc = formMemoryOperand(Load->getSourceAddress(), 642 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
642 LoadDest->getType(), DoLegalize); 643 LoadDest->getType(), DoLegalize);
643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { 644 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
644 // An AtomicLoad intrinsic qualifies as long as it has a valid 645 // An AtomicLoad intrinsic qualifies as long as it has a valid
645 // memory ordering, and can be implemented in a single 646 // memory ordering, and can be implemented in a single
646 // instruction (i.e., not i64). 647 // instruction (i.e., not i64 on x86-32).
647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; 648 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
648 if (ID == Intrinsics::AtomicLoad && 649 if (ID == Intrinsics::AtomicLoad &&
649 Intrin->getDest()->getType() != IceType_i64 && 650 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
650 Intrinsics::isMemoryOrderValid( 651 Intrinsics::isMemoryOrderValid(
651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { 652 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
652 LoadDest = Intrin->getDest(); 653 LoadDest = Intrin->getDest();
653 const bool DoLegalize = false; 654 const bool DoLegalize = false;
654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), 655 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
655 DoLegalize); 656 DoLegalize);
656 } 657 }
657 } 658 }
658 // A Load instruction can be folded into the following 659 // A Load instruction can be folded into the following
659 // instruction only if the following instruction ends the Load's 660 // instruction only if the following instruction ends the Load's
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
717 template <class Machine> 718 template <class Machine>
718 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 719 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { 720 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
720 return Br->optimizeBranch(NextNode); 721 return Br->optimizeBranch(NextNode);
721 } 722 }
722 return false; 723 return false;
723 } 724 }
724 725
725 template <class Machine> 726 template <class Machine>
726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 727 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
728 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
729 if (RegNum == Traits::RegisterSet::Reg_esp ||
730 RegNum == Traits::RegisterSet::Reg_ebp)
731 Ty = Traits::WordType;
727 if (Ty == IceType_void) 732 if (Ty == IceType_void)
728 Ty = IceType_i32; 733 Ty = IceType_i32;
729 if (PhysicalRegisters[Ty].empty()) 734 if (PhysicalRegisters[Ty].empty())
730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 735 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
731 assert(RegNum < PhysicalRegisters[Ty].size()); 736 assert(RegNum < PhysicalRegisters[Ty].size());
732 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 737 Variable *Reg = PhysicalRegisters[Ty][RegNum];
733 if (Reg == nullptr) { 738 if (Reg == nullptr) {
734 Reg = Func->makeVariable(Ty); 739 Reg = Func->makeVariable(Ty);
735 Reg->setRegNum(RegNum); 740 Reg->setRegNum(RegNum);
736 PhysicalRegisters[Ty][RegNum] = Reg; 741 PhysicalRegisters[Ty][RegNum] = Reg;
(...skipping 26 matching lines...) Expand all
763 } 768 }
764 int32_t Offset = Var->getStackOffset(); 769 int32_t Offset = Var->getStackOffset();
765 int32_t BaseRegNum = Var->getBaseRegNum(); 770 int32_t BaseRegNum = Var->getBaseRegNum();
766 if (BaseRegNum == Variable::NoRegister) { 771 if (BaseRegNum == Variable::NoRegister) {
767 BaseRegNum = getFrameOrStackReg(); 772 BaseRegNum = getFrameOrStackReg();
768 if (!hasFramePointer()) 773 if (!hasFramePointer())
769 Offset += getStackAdjustment(); 774 Offset += getStackAdjustment();
770 } 775 }
771 if (Offset) 776 if (Offset)
772 Str << Offset; 777 Str << Offset;
773 const Type FrameSPTy = IceType_i32; 778 const Type FrameSPTy = Traits::WordType;
774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; 779 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
775 } 780 }
776 781
777 template <class Machine> 782 template <class Machine>
778 typename TargetX86Base<Machine>::Traits::Address 783 typename TargetX86Base<Machine>::Traits::Address
779 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { 784 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
780 if (Var->hasReg()) 785 if (Var->hasReg())
781 llvm_unreachable("Stack Variable has a register assigned"); 786 llvm_unreachable("Stack Variable has a register assigned");
782 if (Var->getWeight().isInf()) { 787 if (Var->getWeight().isInf()) {
783 llvm_unreachable("Infinite-weight Variable has no register assigned"); 788 llvm_unreachable("Infinite-weight Variable has no register assigned");
(...skipping 19 matching lines...) Expand all
803 /// function generates an instruction to copy Arg into its assigned 808 /// function generates an instruction to copy Arg into its assigned
804 /// register if applicable. 809 /// register if applicable.
805 template <class Machine> 810 template <class Machine>
806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 811 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
807 Variable *FramePtr, 812 Variable *FramePtr,
808 size_t BasicFrameOffset, 813 size_t BasicFrameOffset,
809 size_t &InArgsSizeBytes) { 814 size_t &InArgsSizeBytes) {
810 Variable *Lo = Arg->getLo(); 815 Variable *Lo = Arg->getLo();
811 Variable *Hi = Arg->getHi(); 816 Variable *Hi = Arg->getHi();
812 Type Ty = Arg->getType(); 817 Type Ty = Arg->getType();
813 if (Lo && Hi && Ty == IceType_i64) { 818 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
814 // TODO(jpp): This special case is not needed for x86-64.
815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 819 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 820 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 821 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 822 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
819 return; 823 return;
820 } 824 }
821 if (isVectorType(Ty)) { 825 if (isVectorType(Ty)) {
822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 826 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
823 } 827 }
824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 828 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 829 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
826 if (Arg->hasReg()) { 830 if (Arg->hasReg()) {
827 assert(Ty != IceType_i64); 831 assert(Ty != IceType_i64 || Traits::Is64Bit);
828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( 832 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 833 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
830 if (isVectorType(Arg->getType())) { 834 if (isVectorType(Arg->getType())) {
831 _movp(Arg, Mem); 835 _movp(Arg, Mem);
832 } else { 836 } else {
833 _mov(Arg, Mem); 837 _mov(Arg, Mem);
834 } 838 }
835 // This argument-copying instruction uses an explicit Traits::X86OperandMem 839 // This argument-copying instruction uses an explicit Traits::X86OperandMem
836 // operand instead of a Variable, so its fill-from-stack operation has to be 840 // operand instead of a Variable, so its fill-from-stack operation has to be
837 // tracked separately for statistics. 841 // tracked separately for statistics.
838 Ctx->statsUpdateFills(); 842 Ctx->statsUpdateFills();
839 } 843 }
840 } 844 }
841 845
842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 846 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
843 // TODO(jpp): this is wrong for x86-64. 847 return Traits::WordType;
844 return IceType_i32;
845 } 848 }
846 849
847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { 850 template <class Machine>
851 template <typename T>
852 typename std::enable_if<!T::Is64Bit, void>::type
853 TargetX86Base<Machine>::split64(Variable *Var) {
848 switch (Var->getType()) { 854 switch (Var->getType()) {
849 default: 855 default:
850 return; 856 return;
851 case IceType_i64: 857 case IceType_i64:
852 // TODO: Only consider F64 if we need to push each half when 858 // TODO: Only consider F64 if we need to push each half when
853 // passing as an argument to a function call. Note that each half 859 // passing as an argument to a function call. Note that each half
854 // is still typed as I32. 860 // is still typed as I32.
855 case IceType_f64: 861 case IceType_f64:
856 break; 862 break;
857 } 863 }
(...skipping 11 matching lines...) Expand all
869 Hi->setName(Func, Var->getName(Func) + "__hi"); 875 Hi->setName(Func, Var->getName(Func) + "__hi");
870 } 876 }
871 Var->setLoHi(Lo, Hi); 877 Var->setLoHi(Lo, Hi);
872 if (Var->getIsArg()) { 878 if (Var->getIsArg()) {
873 Lo->setIsArg(); 879 Lo->setIsArg();
874 Hi->setIsArg(); 880 Hi->setIsArg();
875 } 881 }
876 } 882 }
877 883
878 template <class Machine> 884 template <class Machine>
879 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { 885 template <typename T>
886 typename std::enable_if<!T::Is64Bit, Operand>::type *
887 TargetX86Base<Machine>::loOperand(Operand *Operand) {
880 assert(Operand->getType() == IceType_i64 || 888 assert(Operand->getType() == IceType_i64 ||
881 Operand->getType() == IceType_f64); 889 Operand->getType() == IceType_f64);
882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 890 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
883 return Operand; 891 return Operand;
884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 892 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
885 split64(Var); 893 split64(Var);
886 return Var->getLo(); 894 return Var->getLo();
887 } 895 }
888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 896 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 897 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 898 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
891 // Check if we need to blind/pool the constant. 899 // Check if we need to blind/pool the constant.
892 return legalize(ConstInt); 900 return legalize(ConstInt);
893 } 901 }
894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { 902 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
895 auto *MemOperand = Traits::X86OperandMem::create( 903 auto *MemOperand = Traits::X86OperandMem::create(
896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 904 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
897 Mem->getShift(), Mem->getSegmentRegister()); 905 Mem->getShift(), Mem->getSegmentRegister());
898 // Test if we should randomize or pool the offset, if so randomize it or 906 // Test if we should randomize or pool the offset, if so randomize it or
899 // pool it then create mem operand with the blinded/pooled constant. 907 // pool it then create mem operand with the blinded/pooled constant.
900 // Otherwise, return the mem operand as ordinary mem operand. 908 // Otherwise, return the mem operand as ordinary mem operand.
901 return legalize(MemOperand); 909 return legalize(MemOperand);
902 } 910 }
903 llvm_unreachable("Unsupported operand type"); 911 llvm_unreachable("Unsupported operand type");
904 return nullptr; 912 return nullptr;
905 } 913 }
906 914
907 template <class Machine> 915 template <class Machine>
908 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { 916 template <typename T>
917 typename std::enable_if<!T::Is64Bit, Operand>::type *
918 TargetX86Base<Machine>::hiOperand(Operand *Operand) {
909 assert(Operand->getType() == IceType_i64 || 919 assert(Operand->getType() == IceType_i64 ||
910 Operand->getType() == IceType_f64); 920 Operand->getType() == IceType_f64);
911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 921 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
912 return Operand; 922 return Operand;
913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 923 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
914 split64(Var); 924 split64(Var);
915 return Var->getHi(); 925 return Var->getHi();
916 } 926 }
917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 927 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 928 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
1100 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { 1110 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1101 Variable *Dest = Inst->getDest(); 1111 Variable *Dest = Inst->getDest();
1102 Operand *Src0 = legalize(Inst->getSrc(0)); 1112 Operand *Src0 = legalize(Inst->getSrc(0));
1103 Operand *Src1 = legalize(Inst->getSrc(1)); 1113 Operand *Src1 = legalize(Inst->getSrc(1));
1104 if (Inst->isCommutative()) { 1114 if (Inst->isCommutative()) {
1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1115 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1106 std::swap(Src0, Src1); 1116 std::swap(Src0, Src1);
1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) 1117 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1108 std::swap(Src0, Src1); 1118 std::swap(Src0, Src1);
1109 } 1119 }
1110 if (Dest->getType() == IceType_i64) { 1120 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1111 // These helper-call-involved instructions are lowered in this 1121 // These x86-32 helper-call-involved instructions are lowered in this
1112 // separate switch. This is because loOperand() and hiOperand() 1122 // separate switch. This is because loOperand() and hiOperand()
1113 // may insert redundant instructions for constant blinding and 1123 // may insert redundant instructions for constant blinding and
1114 // pooling. Such redundant instructions will fail liveness analysis 1124 // pooling. Such redundant instructions will fail liveness analysis
1115 // under -Om1 setting. And, actually these arguments do not need 1125 // under -Om1 setting. And, actually these arguments do not need
1116 // to be processed with loOperand() and hiOperand() to be used. 1126 // to be processed with loOperand() and hiOperand() to be used.
1117 switch (Inst->getOp()) { 1127 switch (Inst->getOp()) {
1118 case InstArithmetic::Udiv: { 1128 case InstArithmetic::Udiv: {
1119 const SizeT MaxSrcs = 2; 1129 const SizeT MaxSrcs = 2;
1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1130 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1121 Call->addArg(Inst->getSrc(0)); 1131 Call->addArg(Inst->getSrc(0));
(...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after
1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1659 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1650 // this ever becomes a problem we can introduce a pseudo rem instruction 1660 // this ever becomes a problem we can introduce a pseudo rem instruction
1651 // that returns the remainder in %al directly (and uses a mov for copying 1661 // that returns the remainder in %al directly (and uses a mov for copying
1652 // %ah to %al.) 1662 // %ah to %al.)
1653 static constexpr uint8_t AlSizeInBits = 8; 1663 static constexpr uint8_t AlSizeInBits = 8;
1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1664 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1655 _mov(Dest, T); 1665 _mov(Dest, T);
1656 Context.insert(InstFakeUse::create(Func, T_eax)); 1666 Context.insert(InstFakeUse::create(Func, T_eax));
1657 } else { 1667 } else {
1658 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1668 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); 1669 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1670 _mov(T_edx, Zero);
1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1671 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1661 _div(T_edx, Src1, T); 1672 _div(T_edx, Src1, T);
1662 _mov(Dest, T_edx); 1673 _mov(Dest, T_edx);
1663 } 1674 }
1664 break; 1675 break;
1665 case InstArithmetic::Srem: 1676 case InstArithmetic::Srem:
1666 // TODO(stichnot): Enable this after doing better performance 1677 // TODO(stichnot): Enable this after doing better performance
1667 // and cross testing. 1678 // and cross testing.
1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1679 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1669 // Optimize mod by constant power of 2, but not for Om1 or O0, 1680 // Optimize mod by constant power of 2, but not for Om1 or O0,
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't 1725 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1726 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1716 // this ever becomes a problem we can introduce a pseudo rem instruction 1727 // this ever becomes a problem we can introduce a pseudo rem instruction
1717 // that returns the remainder in %al directly (and uses a mov for copying 1728 // that returns the remainder in %al directly (and uses a mov for copying
1718 // %ah to %al.) 1729 // %ah to %al.)
1719 static constexpr uint8_t AlSizeInBits = 8; 1730 static constexpr uint8_t AlSizeInBits = 8;
1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1731 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1721 _mov(Dest, T); 1732 _mov(Dest, T);
1722 Context.insert(InstFakeUse::create(Func, T_eax)); 1733 Context.insert(InstFakeUse::create(Func, T_eax));
1723 } else { 1734 } else {
1724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 1735 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1736 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1726 _cbwdq(T_edx, T); 1737 _cbwdq(T_edx, T);
1727 _idiv(T_edx, Src1, T); 1738 _idiv(T_edx, Src1, T);
1728 _mov(Dest, T_edx); 1739 _mov(Dest, T_edx);
1729 } 1740 }
1730 break; 1741 break;
1731 case InstArithmetic::Fadd: 1742 case InstArithmetic::Fadd:
1732 _mov(T, Src0); 1743 _mov(T, Src0);
1733 _addss(T, Src1); 1744 _addss(T, Src1);
1734 _mov(Dest, T); 1745 _mov(Dest, T);
(...skipping 23 matching lines...) Expand all
1758 return lowerCall(Call); 1769 return lowerCall(Call);
1759 } 1770 }
1760 } 1771 }
1761 } 1772 }
1762 1773
1763 template <class Machine> 1774 template <class Machine>
1764 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1775 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1765 Variable *Dest = Inst->getDest(); 1776 Variable *Dest = Inst->getDest();
1766 Operand *Src0 = Inst->getSrc(0); 1777 Operand *Src0 = Inst->getSrc(0);
1767 assert(Dest->getType() == Src0->getType()); 1778 assert(Dest->getType() == Src0->getType());
1768 if (Dest->getType() == IceType_i64) { 1779 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1769 Src0 = legalize(Src0); 1780 Src0 = legalize(Src0);
1770 Operand *Src0Lo = loOperand(Src0); 1781 Operand *Src0Lo = loOperand(Src0);
1771 Operand *Src0Hi = hiOperand(Src0); 1782 Operand *Src0Hi = hiOperand(Src0);
1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1783 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1784 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1774 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1785 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1775 _mov(T_Lo, Src0Lo); 1786 _mov(T_Lo, Src0Lo);
1776 _mov(DestLo, T_Lo); 1787 _mov(DestLo, T_Lo);
1777 _mov(T_Hi, Src0Hi); 1788 _mov(T_Hi, Src0Hi);
1778 _mov(DestHi, T_Hi); 1789 _mov(DestHi, T_Hi);
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
1863 SizeT ShiftAmount = 1874 SizeT ShiftAmount =
1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1875 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
1865 1; 1876 1;
1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 1877 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1867 Variable *T = makeReg(DestTy); 1878 Variable *T = makeReg(DestTy);
1868 _movp(T, Src0RM); 1879 _movp(T, Src0RM);
1869 _psll(T, ShiftConstant); 1880 _psll(T, ShiftConstant);
1870 _psra(T, ShiftConstant); 1881 _psra(T, ShiftConstant);
1871 _movp(Dest, T); 1882 _movp(Dest, T);
1872 } 1883 }
1873 } else if (Dest->getType() == IceType_i64) { 1884 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1885 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1875 Constant *Shift = Ctx->getConstantInt32(31); 1886 Constant *Shift = Ctx->getConstantInt32(31);
1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1887 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1888 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1878 Variable *T_Lo = makeReg(DestLo->getType()); 1889 Variable *T_Lo = makeReg(DestLo->getType());
1879 if (Src0RM->getType() == IceType_i32) { 1890 if (Src0RM->getType() == IceType_i32) {
1880 _mov(T_Lo, Src0RM); 1891 _mov(T_Lo, Src0RM);
1881 } else if (Src0RM->getType() == IceType_i1) { 1892 } else if (Src0RM->getType() == IceType_i1) {
1882 _movzx(T_Lo, Src0RM); 1893 _movzx(T_Lo, Src0RM);
1883 _shl(T_Lo, Shift); 1894 _shl(T_Lo, Shift);
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1923 case InstCast::Zext: { 1934 case InstCast::Zext: {
1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1935 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1925 if (isVectorType(Dest->getType())) { 1936 if (isVectorType(Dest->getType())) {
1926 // onemask = materialize(1,1,...); dest = onemask & src 1937 // onemask = materialize(1,1,...); dest = onemask & src
1927 Type DestTy = Dest->getType(); 1938 Type DestTy = Dest->getType();
1928 Variable *OneMask = makeVectorOfOnes(DestTy); 1939 Variable *OneMask = makeVectorOfOnes(DestTy);
1929 Variable *T = makeReg(DestTy); 1940 Variable *T = makeReg(DestTy);
1930 _movp(T, Src0RM); 1941 _movp(T, Src0RM);
1931 _pand(T, OneMask); 1942 _pand(T, OneMask);
1932 _movp(Dest, T); 1943 _movp(Dest, T);
1933 } else if (Dest->getType() == IceType_i64) { 1944 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1934 // t1=movzx src; dst.lo=t1; dst.hi=0 1945 // t1=movzx src; dst.lo=t1; dst.hi=0
1935 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1946 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1947 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1948 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1938 Variable *Tmp = makeReg(DestLo->getType()); 1949 Variable *Tmp = makeReg(DestLo->getType());
1939 if (Src0RM->getType() == IceType_i32) { 1950 if (Src0RM->getType() == IceType_i32) {
1940 _mov(Tmp, Src0RM); 1951 _mov(Tmp, Src0RM);
1941 } else { 1952 } else {
1942 _movzx(Tmp, Src0RM); 1953 _movzx(Tmp, Src0RM);
1943 } 1954 }
1944 if (Src0RM->getType() == IceType_i1) { 1955 if (Src0RM->getType() == IceType_i1) {
1945 Constant *One = Ctx->getConstantInt32(1); 1956 Constant *One = Ctx->getConstantInt32(1);
1946 _and(Tmp, One); 1957 _and(Tmp, One);
1947 } 1958 }
1948 _mov(DestLo, Tmp); 1959 _mov(DestLo, Tmp);
1949 _mov(DestHi, Zero); 1960 _mov(DestHi, Zero);
1950 } else if (Src0RM->getType() == IceType_i1) { 1961 } else if (Src0RM->getType() == IceType_i1) {
1951 // t = Src0RM; t &= 1; Dest = t 1962 // t = Src0RM; t &= 1; Dest = t
1952 Constant *One = Ctx->getConstantInt32(1); 1963 Constant *One = Ctx->getConstantInt32(1);
1953 Type DestTy = Dest->getType(); 1964 Type DestTy = Dest->getType();
1954 Variable *T; 1965 Variable *T = nullptr;
1955 if (DestTy == IceType_i8) { 1966 if (DestTy == IceType_i8) {
1956 T = makeReg(DestTy);
1957 _mov(T, Src0RM); 1967 _mov(T, Src0RM);
1958 } else { 1968 } else {
1969 assert(DestTy != IceType_i1);
1970 assert(Traits::Is64Bit || DestTy != IceType_i64);
1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. 1971 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
1960 T = makeReg(IceType_i32); 1972 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
1973 // written to the stack (i.e., in -Om1) will be fully zero-extended.
1974 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
1961 _movzx(T, Src0RM); 1975 _movzx(T, Src0RM);
1962 } 1976 }
1963 _and(T, One); 1977 _and(T, One);
1964 _mov(Dest, T); 1978 _mov(Dest, T);
1965 } else { 1979 } else {
1966 // t1 = movzx src; dst = t1 1980 // t1 = movzx src; dst = t1
1967 Variable *T = makeReg(Dest->getType()); 1981 Variable *T = makeReg(Dest->getType());
1968 _movzx(T, Src0RM); 1982 _movzx(T, Src0RM);
1969 _mov(Dest, T); 1983 _mov(Dest, T);
1970 } 1984 }
1971 break; 1985 break;
1972 } 1986 }
1973 case InstCast::Trunc: { 1987 case InstCast::Trunc: {
1974 if (isVectorType(Dest->getType())) { 1988 if (isVectorType(Dest->getType())) {
1975 // onemask = materialize(1,1,...); dst = src & onemask 1989 // onemask = materialize(1,1,...); dst = src & onemask
1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1990 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1977 Type Src0Ty = Src0RM->getType(); 1991 Type Src0Ty = Src0RM->getType();
1978 Variable *OneMask = makeVectorOfOnes(Src0Ty); 1992 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1979 Variable *T = makeReg(Dest->getType()); 1993 Variable *T = makeReg(Dest->getType());
1980 _movp(T, Src0RM); 1994 _movp(T, Src0RM);
1981 _pand(T, OneMask); 1995 _pand(T, OneMask);
1982 _movp(Dest, T); 1996 _movp(Dest, T);
1983 } else { 1997 } else {
1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 1998 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1985 if (Src0->getType() == IceType_i64) 1999 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
1986 Src0 = loOperand(Src0); 2000 Src0 = loOperand(Src0);
1987 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2001 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
1988 // t1 = trunc Src0RM; Dest = t1 2002 // t1 = trunc Src0RM; Dest = t1
1989 Variable *T = nullptr; 2003 Variable *T = nullptr;
1990 _mov(T, Src0RM); 2004 _mov(T, Src0RM);
1991 if (Dest->getType() == IceType_i1) 2005 if (Dest->getType() == IceType_i1)
1992 _and(T, Ctx->getConstantInt1(1)); 2006 _and(T, Ctx->getConstantInt1(1));
1993 _mov(Dest, T); 2007 _mov(Dest, T);
1994 } 2008 }
1995 break; 2009 break;
(...skipping 10 matching lines...) Expand all
2006 case InstCast::Fptosi: 2020 case InstCast::Fptosi:
2007 if (isVectorType(Dest->getType())) { 2021 if (isVectorType(Dest->getType())) {
2008 assert(Dest->getType() == IceType_v4i32 && 2022 assert(Dest->getType() == IceType_v4i32 &&
2009 Inst->getSrc(0)->getType() == IceType_v4f32); 2023 Inst->getSrc(0)->getType() == IceType_v4f32);
2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2024 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2025 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2012 Src0RM = legalizeToReg(Src0RM); 2026 Src0RM = legalizeToReg(Src0RM);
2013 Variable *T = makeReg(Dest->getType()); 2027 Variable *T = makeReg(Dest->getType());
2014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2028 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2015 _movp(Dest, T); 2029 _movp(Dest, T);
2016 } else if (Dest->getType() == IceType_i64) { 2030 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2017 // Use a helper for converting floating-point values to 64-bit 2031 // Use a helper for converting floating-point values to 64-bit
2018 // integers. SSE2 appears to have no way to convert from xmm 2032 // integers. SSE2 appears to have no way to convert from xmm
2019 // registers to something like the edx:eax register pair, and 2033 // registers to something like the edx:eax register pair, and
2020 // gcc and clang both want to use x87 instructions complete with 2034 // gcc and clang both want to use x87 instructions complete with
2021 // temporary manipulation of the status word. This helper is 2035 // temporary manipulation of the status word. This helper is
2022 // not needed for x86-64. 2036 // not needed for x86-64.
2023 split64(Dest); 2037 split64(Dest);
2024 const SizeT MaxSrcs = 1; 2038 const SizeT MaxSrcs = 1;
2025 Type SrcType = Inst->getSrc(0)->getType(); 2039 Type SrcType = Inst->getSrc(0)->getType();
2026 InstCall *Call = 2040 InstCall *Call =
2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2041 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2028 : H_fptosi_f64_i64, 2042 : H_fptosi_f64_i64,
2029 Dest, MaxSrcs); 2043 Dest, MaxSrcs);
2030 Call->addArg(Inst->getSrc(0)); 2044 Call->addArg(Inst->getSrc(0));
2031 lowerCall(Call); 2045 lowerCall(Call);
2032 } else { 2046 } else {
2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2047 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2048 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2035 Variable *T_1 = makeReg(IceType_i32); 2049 Variable *T_1 = nullptr;
2050 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
2051 T_1 = makeReg(IceType_i64);
2052 } else {
2053 assert(Dest->getType() != IceType_i64);
2054 T_1 = makeReg(IceType_i32);
2055 }
2056 // cvt() requires its integer argument to be a GPR.
2057 T_1->setWeightInfinite();
2036 Variable *T_2 = makeReg(Dest->getType()); 2058 Variable *T_2 = makeReg(Dest->getType());
2037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2059 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2060 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2039 if (Dest->getType() == IceType_i1) 2061 if (Dest->getType() == IceType_i1)
2040 _and(T_2, Ctx->getConstantInt1(1)); 2062 _and(T_2, Ctx->getConstantInt1(1));
2041 _mov(Dest, T_2); 2063 _mov(Dest, T_2);
2042 } 2064 }
2043 break; 2065 break;
2044 case InstCast::Fptoui: 2066 case InstCast::Fptoui:
2045 if (isVectorType(Dest->getType())) { 2067 if (isVectorType(Dest->getType())) {
2046 assert(Dest->getType() == IceType_v4i32 && 2068 assert(Dest->getType() == IceType_v4i32 &&
2047 Inst->getSrc(0)->getType() == IceType_v4f32); 2069 Inst->getSrc(0)->getType() == IceType_v4f32);
2048 const SizeT MaxSrcs = 1; 2070 const SizeT MaxSrcs = 1;
2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); 2071 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2050 Call->addArg(Inst->getSrc(0)); 2072 Call->addArg(Inst->getSrc(0));
2051 lowerCall(Call); 2073 lowerCall(Call);
2052 } else if (Dest->getType() == IceType_i64 || 2074 } else if (Dest->getType() == IceType_i64 ||
2053 Dest->getType() == IceType_i32) { 2075 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
2054 // Use a helper for both x86-32 and x86-64. 2076 // Use a helper for both x86-32 and x86-64.
2055 split64(Dest); 2077 if (!Traits::Is64Bit)
2078 split64(Dest);
2056 const SizeT MaxSrcs = 1; 2079 const SizeT MaxSrcs = 1;
2057 Type DestType = Dest->getType(); 2080 Type DestType = Dest->getType();
2058 Type SrcType = Inst->getSrc(0)->getType(); 2081 Type SrcType = Inst->getSrc(0)->getType();
2059 IceString TargetString; 2082 IceString TargetString;
2060 if (isInt32Asserting32Or64(DestType)) { 2083 if (Traits::Is64Bit) {
2084 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2085 : H_fptoui_f64_i64;
2086 } else if (isInt32Asserting32Or64(DestType)) {
2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 2087 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2062 : H_fptoui_f64_i32; 2088 : H_fptoui_f64_i32;
2063 } else { 2089 } else {
2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2090 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2065 : H_fptoui_f64_i64; 2091 : H_fptoui_f64_i64;
2066 } 2092 }
2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2093 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2068 Call->addArg(Inst->getSrc(0)); 2094 Call->addArg(Inst->getSrc(0));
2069 lowerCall(Call); 2095 lowerCall(Call);
2070 return; 2096 return;
2071 } else { 2097 } else {
2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2099 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2074 Variable *T_1 = makeReg(IceType_i32); 2100 assert(Dest->getType() != IceType_i64);
2101 Variable *T_1 = nullptr;
2102 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
2103 T_1 = makeReg(IceType_i64);
2104 } else {
2105 assert(Dest->getType() != IceType_i32);
2106 T_1 = makeReg(IceType_i32);
2107 }
2108 T_1->setWeightInfinite();
2075 Variable *T_2 = makeReg(Dest->getType()); 2109 Variable *T_2 = makeReg(Dest->getType());
2076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2110 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2111 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2078 if (Dest->getType() == IceType_i1) 2112 if (Dest->getType() == IceType_i1)
2079 _and(T_2, Ctx->getConstantInt1(1)); 2113 _and(T_2, Ctx->getConstantInt1(1));
2080 _mov(Dest, T_2); 2114 _mov(Dest, T_2);
2081 } 2115 }
2082 break; 2116 break;
2083 case InstCast::Sitofp: 2117 case InstCast::Sitofp:
2084 if (isVectorType(Dest->getType())) { 2118 if (isVectorType(Dest->getType())) {
2085 assert(Dest->getType() == IceType_v4f32 && 2119 assert(Dest->getType() == IceType_v4f32 &&
2086 Inst->getSrc(0)->getType() == IceType_v4i32); 2120 Inst->getSrc(0)->getType() == IceType_v4i32);
2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2121 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2122 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2089 Src0RM = legalizeToReg(Src0RM); 2123 Src0RM = legalizeToReg(Src0RM);
2090 Variable *T = makeReg(Dest->getType()); 2124 Variable *T = makeReg(Dest->getType());
2091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2125 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2092 _movp(Dest, T); 2126 _movp(Dest, T);
2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) { 2127 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2094 // Use a helper for x86-32. 2128 // Use a helper for x86-32.
2095 const SizeT MaxSrcs = 1; 2129 const SizeT MaxSrcs = 1;
2096 Type DestType = Dest->getType(); 2130 Type DestType = Dest->getType();
2097 InstCall *Call = 2131 InstCall *Call =
2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2132 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2099 : H_sitofp_i64_f64, 2133 : H_sitofp_i64_f64,
2100 Dest, MaxSrcs); 2134 Dest, MaxSrcs);
2101 // TODO: Call the correct compiler-rt helper function. 2135 // TODO: Call the correct compiler-rt helper function.
2102 Call->addArg(Inst->getSrc(0)); 2136 Call->addArg(Inst->getSrc(0));
2103 lowerCall(Call); 2137 lowerCall(Call);
2104 return; 2138 return;
2105 } else { 2139 } else {
2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2140 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2107 // Sign-extend the operand. 2141 // Sign-extend the operand.
2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2142 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2109 Variable *T_1 = makeReg(IceType_i32); 2143 Variable *T_1 = nullptr;
2144 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2145 T_1 = makeReg(IceType_i64);
2146 } else {
2147 assert(Src0RM->getType() != IceType_i64);
2148 T_1 = makeReg(IceType_i32);
2149 }
2150 T_1->setWeightInfinite();
2110 Variable *T_2 = makeReg(Dest->getType()); 2151 Variable *T_2 = makeReg(Dest->getType());
2111 if (Src0RM->getType() == IceType_i32) 2152 if (Src0RM->getType() == T_1->getType())
2112 _mov(T_1, Src0RM); 2153 _mov(T_1, Src0RM);
2113 else 2154 else
2114 _movsx(T_1, Src0RM); 2155 _movsx(T_1, Src0RM);
2115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2116 _mov(Dest, T_2); 2157 _mov(Dest, T_2);
2117 } 2158 }
2118 break; 2159 break;
2119 case InstCast::Uitofp: { 2160 case InstCast::Uitofp: {
2120 Operand *Src0 = Inst->getSrc(0); 2161 Operand *Src0 = Inst->getSrc(0);
2121 if (isVectorType(Src0->getType())) { 2162 if (isVectorType(Src0->getType())) {
2122 assert(Dest->getType() == IceType_v4f32 && 2163 assert(Dest->getType() == IceType_v4f32 &&
2123 Src0->getType() == IceType_v4i32); 2164 Src0->getType() == IceType_v4i32);
2124 const SizeT MaxSrcs = 1; 2165 const SizeT MaxSrcs = 1;
2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2166 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2126 Call->addArg(Src0); 2167 Call->addArg(Src0);
2127 lowerCall(Call); 2168 lowerCall(Call);
2128 } else if (Src0->getType() == IceType_i64 || 2169 } else if (Src0->getType() == IceType_i64 ||
2129 Src0->getType() == IceType_i32) { 2170 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2130 // Use a helper for x86-32 and x86-64. Also use a helper for 2171 // Use a helper for x86-32 and x86-64. Also use a helper for
2131 // i32 on x86-32. 2172 // i32 on x86-32.
2132 const SizeT MaxSrcs = 1; 2173 const SizeT MaxSrcs = 1;
2133 Type DestType = Dest->getType(); 2174 Type DestType = Dest->getType();
2134 IceString TargetString; 2175 IceString TargetString;
2135 if (isInt32Asserting32Or64(Src0->getType())) { 2176 if (isInt32Asserting32Or64(Src0->getType())) {
2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2177 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2137 : H_uitofp_i32_f64; 2178 : H_uitofp_i32_f64;
2138 } else { 2179 } else {
2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2180 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2140 : H_uitofp_i64_f64; 2181 : H_uitofp_i64_f64;
2141 } 2182 }
2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2183 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2143 Call->addArg(Src0); 2184 Call->addArg(Src0);
2144 lowerCall(Call); 2185 lowerCall(Call);
2145 return; 2186 return;
2146 } else { 2187 } else {
2147 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2188 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2148 // Zero-extend the operand. 2189 // Zero-extend the operand.
2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2190 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2150 Variable *T_1 = makeReg(IceType_i32); 2191 Variable *T_1 = nullptr;
2192 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2193 T_1 = makeReg(IceType_i64);
2194 } else {
2195 assert(Src0RM->getType() != IceType_i64);
2196 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2197 T_1 = makeReg(IceType_i32);
2198 }
2199 T_1->setWeightInfinite();
2151 Variable *T_2 = makeReg(Dest->getType()); 2200 Variable *T_2 = makeReg(Dest->getType());
2152 if (Src0RM->getType() == IceType_i32) 2201 if (Src0RM->getType() == T_1->getType())
2153 _mov(T_1, Src0RM); 2202 _mov(T_1, Src0RM);
2154 else 2203 else
2155 _movzx(T_1, Src0RM); 2204 _movzx(T_1, Src0RM);
2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2205 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2157 _mov(Dest, T_2); 2206 _mov(Dest, T_2);
2158 } 2207 }
2159 break; 2208 break;
2160 } 2209 }
2161 case InstCast::Bitcast: { 2210 case InstCast::Bitcast: {
2162 Operand *Src0 = Inst->getSrc(0); 2211 Operand *Src0 = Inst->getSrc(0);
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
2198 typename Traits::SpillVariable *SpillVar = 2247 typename Traits::SpillVariable *SpillVar =
2199 Func->makeVariable<typename Traits::SpillVariable>(SrcType); 2248 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
2200 SpillVar->setLinkedTo(Dest); 2249 SpillVar->setLinkedTo(Dest);
2201 Variable *Spill = SpillVar; 2250 Variable *Spill = SpillVar;
2202 Spill->setWeight(RegWeight::Zero); 2251 Spill->setWeight(RegWeight::Zero);
2203 _mov(T, Src0RM); 2252 _mov(T, Src0RM);
2204 _mov(Spill, T); 2253 _mov(Spill, T);
2205 _mov(Dest, Spill); 2254 _mov(Dest, Spill);
2206 } break; 2255 } break;
2207 case IceType_i64: { 2256 case IceType_i64: {
2208 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2257 assert(Src0->getType() == IceType_f64);
2209 assert(Src0RM->getType() == IceType_f64); 2258 if (Traits::Is64Bit) {
2210 // a.i64 = bitcast b.f64 ==> 2259 // Movd requires its fp argument (in this case, the bitcast source) to
2211 // s.f64 = spill b.f64 2260 // be an xmm register.
2212 // t_lo.i32 = lo(s.f64) 2261 Variable *Src0R = legalizeToReg(Src0);
2213 // a_lo.i32 = t_lo.i32 2262 Variable *T = makeReg(IceType_i64);
2214 // t_hi.i32 = hi(s.f64) 2263 _movd(T, Src0R);
2215 // a_hi.i32 = t_hi.i32 2264 _mov(Dest, T);
2216 Operand *SpillLo, *SpillHi; 2265 } else {
2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { 2266 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2267 // a.i64 = bitcast b.f64 ==>
2268 // s.f64 = spill b.f64
2269 // t_lo.i32 = lo(s.f64)
2270 // a_lo.i32 = t_lo.i32
2271 // t_hi.i32 = hi(s.f64)
2272 // a_hi.i32 = t_hi.i32
2273 Operand *SpillLo, *SpillHi;
2274 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2275 typename Traits::SpillVariable *SpillVar =
2276 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2277 SpillVar->setLinkedTo(Src0Var);
2278 Variable *Spill = SpillVar;
2279 Spill->setWeight(RegWeight::Zero);
2280 _movq(Spill, Src0RM);
2281 SpillLo = Traits::VariableSplit::create(Func, Spill,
2282 Traits::VariableSplit::Low);
2283 SpillHi = Traits::VariableSplit::create(Func, Spill,
2284 Traits::VariableSplit::High);
2285 } else {
2286 SpillLo = loOperand(Src0RM);
2287 SpillHi = hiOperand(Src0RM);
2288 }
2289
2290 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2291 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2292 Variable *T_Lo = makeReg(IceType_i32);
2293 Variable *T_Hi = makeReg(IceType_i32);
2294
2295 _mov(T_Lo, SpillLo);
2296 _mov(DestLo, T_Lo);
2297 _mov(T_Hi, SpillHi);
2298 _mov(DestHi, T_Hi);
2299 }
2300 } break;
2301 case IceType_f64: {
2302 assert(Src0->getType() == IceType_i64);
2303 if (Traits::Is64Bit) {
2304 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2305 Variable *T = makeReg(IceType_f64);
2306 // Movd requires its fp argument (in this case, the bitcast destination)
2307 // to be an xmm register.
2308 T->setWeightInfinite();
2309 _movd(T, Src0RM);
2310 _mov(Dest, T);
2311 } else {
2312 Src0 = legalize(Src0);
2313 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2314 Variable *T = Func->makeVariable(Dest->getType());
2315 _movq(T, Src0);
2316 _movq(Dest, T);
2317 break;
2318 }
2319 // a.f64 = bitcast b.i64 ==>
2320 // t_lo.i32 = b_lo.i32
2321 // FakeDef(s.f64)
2322 // lo(s.f64) = t_lo.i32
2323 // t_hi.i32 = b_hi.i32
2324 // hi(s.f64) = t_hi.i32
2325 // a.f64 = s.f64
2218 typename Traits::SpillVariable *SpillVar = 2326 typename Traits::SpillVariable *SpillVar =
2219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); 2327 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2220 SpillVar->setLinkedTo(Src0Var); 2328 SpillVar->setLinkedTo(Dest);
2221 Variable *Spill = SpillVar; 2329 Variable *Spill = SpillVar;
2222 Spill->setWeight(RegWeight::Zero); 2330 Spill->setWeight(RegWeight::Zero);
2223 _movq(Spill, Src0RM); 2331
2224 SpillLo = Traits::VariableSplit::create(Func, Spill, 2332 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2225 Traits::VariableSplit::Low); 2333 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2226 SpillHi = Traits::VariableSplit::create(Func, Spill, 2334 Func, Spill, Traits::VariableSplit::Low);
2227 Traits::VariableSplit::High); 2335 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2228 } else { 2336 Func, Spill, Traits::VariableSplit::High);
2229 SpillLo = loOperand(Src0RM); 2337 _mov(T_Lo, loOperand(Src0));
2230 SpillHi = hiOperand(Src0RM); 2338 // Technically, the Spill is defined after the _store happens, but
2339 // SpillLo is considered a "use" of Spill so define Spill before it
2340 // is used.
2341 Context.insert(InstFakeDef::create(Func, Spill));
2342 _store(T_Lo, SpillLo);
2343 _mov(T_Hi, hiOperand(Src0));
2344 _store(T_Hi, SpillHi);
2345 _movq(Dest, Spill);
2231 } 2346 }
2232
2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2235 Variable *T_Lo = makeReg(IceType_i32);
2236 Variable *T_Hi = makeReg(IceType_i32);
2237
2238 _mov(T_Lo, SpillLo);
2239 _mov(DestLo, T_Lo);
2240 _mov(T_Hi, SpillHi);
2241 _mov(DestHi, T_Hi);
2242 } break;
2243 case IceType_f64: {
2244 Src0 = legalize(Src0);
2245 assert(Src0->getType() == IceType_i64);
2246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2247 Variable *T = Func->makeVariable(Dest->getType());
2248 _movq(T, Src0);
2249 _movq(Dest, T);
2250 break;
2251 }
2252 // a.f64 = bitcast b.i64 ==>
2253 // t_lo.i32 = b_lo.i32
2254 // FakeDef(s.f64)
2255 // lo(s.f64) = t_lo.i32
2256 // t_hi.i32 = b_hi.i32
2257 // hi(s.f64) = t_hi.i32
2258 // a.f64 = s.f64
2259 typename Traits::SpillVariable *SpillVar =
2260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2261 SpillVar->setLinkedTo(Dest);
2262 Variable *Spill = SpillVar;
2263 Spill->setWeight(RegWeight::Zero);
2264
2265 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2267 Func, Spill, Traits::VariableSplit::Low);
2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2269 Func, Spill, Traits::VariableSplit::High);
2270 _mov(T_Lo, loOperand(Src0));
2271 // Technically, the Spill is defined after the _store happens, but
2272 // SpillLo is considered a "use" of Spill so define Spill before it
2273 // is used.
2274 Context.insert(InstFakeDef::create(Func, Spill));
2275 _store(T_Lo, SpillLo);
2276 _mov(T_Hi, hiOperand(Src0));
2277 _store(T_Hi, SpillHi);
2278 _movq(Dest, Spill);
2279 } break; 2347 } break;
2280 case IceType_v8i1: { 2348 case IceType_v8i1: {
2281 assert(Src0->getType() == IceType_i8); 2349 assert(Src0->getType() == IceType_i8);
2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); 2350 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
2283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 2351 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2284 // Arguments to functions are required to be at least 32 bits wide. 2352 // Arguments to functions are required to be at least 32 bits wide.
2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); 2353 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2286 Call->addArg(Src0AsI32); 2354 Call->addArg(Src0AsI32);
2287 lowerCall(Call); 2355 lowerCall(Call);
2288 } break; 2356 } break;
(...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after
2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2676 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2609 _pxor(T, MinusOne); 2677 _pxor(T, MinusOne);
2610 } break; 2678 } break;
2611 } 2679 }
2612 2680
2613 _movp(Dest, T); 2681 _movp(Dest, T);
2614 eliminateNextVectorSextInstruction(Dest); 2682 eliminateNextVectorSextInstruction(Dest);
2615 return; 2683 return;
2616 } 2684 }
2617 2685
2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2686 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2619 if (Src0->getType() == IceType_i64) { 2687 lowerIcmp64(Inst);
2620 InstIcmp::ICond Condition = Inst->getCondition();
2621 size_t Index = static_cast<size_t>(Condition);
2622 assert(Index < Traits::TableIcmp64Size);
2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2627 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2628 Constant *One = Ctx->getConstantInt32(1);
2629 typename Traits::Insts::Label *LabelFalse =
2630 Traits::Insts::Label::create(Func, this);
2631 typename Traits::Insts::Label *LabelTrue =
2632 Traits::Insts::Label::create(Func, this);
2633 _mov(Dest, One);
2634 _cmp(Src0HiRM, Src1HiRI);
2635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2636 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2638 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2639 _cmp(Src0LoRM, Src1LoRI);
2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2641 Context.insert(LabelFalse);
2642 _mov_nonkillable(Dest, Zero);
2643 Context.insert(LabelTrue);
2644 return; 2688 return;
2645 } 2689 }
2646 2690
2647 // cmp b, c 2691 // cmp b, c
2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2692 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2649 _cmp(Src0RM, Src1); 2693 _cmp(Src0RM, Src1);
2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); 2694 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
2651 } 2695 }
2652 2696
2697 template <typename Machine>
2698 template <typename T>
2699 typename std::enable_if<!T::Is64Bit, void>::type
2700 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2702 Operand *Src0 = legalize(Inst->getSrc(0));
2703 Operand *Src1 = legalize(Inst->getSrc(1));
2704 Variable *Dest = Inst->getDest();
2705 InstIcmp::ICond Condition = Inst->getCondition();
2706 size_t Index = static_cast<size_t>(Condition);
2707 assert(Index < Traits::TableIcmp64Size);
2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2712 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2713 Constant *One = Ctx->getConstantInt32(1);
2714 typename Traits::Insts::Label *LabelFalse =
2715 Traits::Insts::Label::create(Func, this);
2716 typename Traits::Insts::Label *LabelTrue =
2717 Traits::Insts::Label::create(Func, this);
2718 _mov(Dest, One);
2719 _cmp(Src0HiRM, Src1HiRI);
2720 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2721 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2722 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2723 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2724 _cmp(Src0LoRM, Src1LoRI);
2725 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2726 Context.insert(LabelFalse);
2727 _mov_nonkillable(Dest, Zero);
2728 Context.insert(LabelTrue);
2729 }
2730
2653 template <class Machine> 2731 template <class Machine>
2654 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 2732 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2655 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2733 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 2734 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2657 ConstantInteger32 *ElementIndex = 2735 ConstantInteger32 *ElementIndex =
2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 2736 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2659 // Only constant indices are allowed in PNaCl IR. 2737 // Only constant indices are allowed in PNaCl IR.
2660 assert(ElementIndex); 2738 assert(ElementIndex);
2661 unsigned Index = ElementIndex->getValue(); 2739 unsigned Index = ElementIndex->getValue();
2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 2740 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
2841 } 2919 }
2842 case Intrinsics::AtomicLoad: { 2920 case Intrinsics::AtomicLoad: {
2843 // We require the memory address to be naturally aligned. 2921 // We require the memory address to be naturally aligned.
2844 // Given that is the case, then normal loads are atomic. 2922 // Given that is the case, then normal loads are atomic.
2845 if (!Intrinsics::isMemoryOrderValid( 2923 if (!Intrinsics::isMemoryOrderValid(
2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) { 2924 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2847 Func->setError("Unexpected memory ordering for AtomicLoad"); 2925 Func->setError("Unexpected memory ordering for AtomicLoad");
2848 return; 2926 return;
2849 } 2927 }
2850 Variable *Dest = Instr->getDest(); 2928 Variable *Dest = Instr->getDest();
2851 if (Dest->getType() == IceType_i64) { 2929 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2852 // Follow what GCC does and use a movq instead of what lowerLoad() 2930 // Follow what GCC does and use a movq instead of what lowerLoad()
2853 // normally does (split the load into two). 2931 // normally does (split the load into two).
2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 2932 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2855 // can't happen anyway, since this is x86-32 and integer arithmetic only 2933 // can't happen anyway, since this is x86-32 and integer arithmetic only
2856 // happens on 32-bit quantities. 2934 // happens on 32-bit quantities.
2857 Variable *T = makeReg(IceType_f64); 2935 Variable *T = makeReg(IceType_f64);
2858 typename Traits::X86OperandMem *Addr = 2936 typename Traits::X86OperandMem *Addr =
2859 formMemoryOperand(Instr->getArg(0), IceType_f64); 2937 formMemoryOperand(Instr->getArg(0), IceType_f64);
2860 _movq(T, Addr); 2938 _movq(T, Addr);
2861 // Then cast the bits back out of the XMM register to the i64 Dest. 2939 // Then cast the bits back out of the XMM register to the i64 Dest.
(...skipping 29 matching lines...) Expand all
2891 if (!Intrinsics::isMemoryOrderValid( 2969 if (!Intrinsics::isMemoryOrderValid(
2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 2970 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2893 Func->setError("Unexpected memory ordering for AtomicStore"); 2971 Func->setError("Unexpected memory ordering for AtomicStore");
2894 return; 2972 return;
2895 } 2973 }
2896 // We require the memory address to be naturally aligned. 2974 // We require the memory address to be naturally aligned.
2897 // Given that is the case, then normal stores are atomic. 2975 // Given that is the case, then normal stores are atomic.
2898 // Add a fence after the store to make it visible. 2976 // Add a fence after the store to make it visible.
2899 Operand *Value = Instr->getArg(0); 2977 Operand *Value = Instr->getArg(0);
2900 Operand *Ptr = Instr->getArg(1); 2978 Operand *Ptr = Instr->getArg(1);
2901 if (Value->getType() == IceType_i64) { 2979 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
2902 // Use a movq instead of what lowerStore() normally does 2980 // Use a movq instead of what lowerStore() normally does
2903 // (split the store into two), following what GCC does. 2981 // (split the store into two), following what GCC does.
2904 // Cast the bits from int -> to an xmm register first. 2982 // Cast the bits from int -> to an xmm register first.
2905 Variable *T = makeReg(IceType_f64); 2983 Variable *T = makeReg(IceType_f64);
2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 2984 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2907 lowerCast(Cast); 2985 lowerCast(Cast);
2908 // Then store XMM w/ a movq. 2986 // Then store XMM w/ a movq.
2909 typename Traits::X86OperandMem *Addr = 2987 typename Traits::X86OperandMem *Addr =
2910 formMemoryOperand(Ptr, IceType_f64); 2988 formMemoryOperand(Ptr, IceType_f64);
2911 _storeq(T, Addr); 2989 _storeq(T, Addr);
2912 _mfence(); 2990 _mfence();
2913 return; 2991 return;
2914 } 2992 }
2915 InstStore *Store = InstStore::create(Func, Value, Ptr); 2993 InstStore *Store = InstStore::create(Func, Value, Ptr);
2916 lowerStore(Store); 2994 lowerStore(Store);
2917 _mfence(); 2995 _mfence();
2918 return; 2996 return;
2919 } 2997 }
2920 case Intrinsics::Bswap: { 2998 case Intrinsics::Bswap: {
2921 Variable *Dest = Instr->getDest(); 2999 Variable *Dest = Instr->getDest();
2922 Operand *Val = Instr->getArg(0); 3000 Operand *Val = Instr->getArg(0);
2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the 3001 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2924 // argument must be a register. Use rotate left for 16-bit bswap. 3002 // argument must be a register. Use rotate left for 16-bit bswap.
2925 if (Val->getType() == IceType_i64) { 3003 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2926 Val = legalizeUndef(Val); 3004 Val = legalizeUndef(Val);
2927 Variable *T_Lo = legalizeToReg(loOperand(Val)); 3005 Variable *T_Lo = legalizeToReg(loOperand(Val));
2928 Variable *T_Hi = legalizeToReg(hiOperand(Val)); 3006 Variable *T_Hi = legalizeToReg(hiOperand(Val));
2929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3007 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3008 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2931 _bswap(T_Lo); 3009 _bswap(T_Lo);
2932 _bswap(T_Hi); 3010 _bswap(T_Hi);
2933 _mov(DestLo, T_Hi); 3011 _mov(DestLo, T_Hi);
2934 _mov(DestHi, T_Lo); 3012 _mov(DestHi, T_Lo);
2935 } else if (Val->getType() == IceType_i32) { 3013 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
3014 Val->getType() == IceType_i32) {
2936 Variable *T = legalizeToReg(Val); 3015 Variable *T = legalizeToReg(Val);
2937 _bswap(T); 3016 _bswap(T);
2938 _mov(Dest, T); 3017 _mov(Dest, T);
2939 } else { 3018 } else {
2940 assert(Val->getType() == IceType_i16); 3019 assert(Val->getType() == IceType_i16);
2941 Constant *Eight = Ctx->getConstantInt16(8); 3020 Constant *Eight = Ctx->getConstantInt16(8);
2942 Variable *T = nullptr; 3021 Variable *T = nullptr;
2943 Val = legalize(Val); 3022 Val = legalize(Val);
2944 _mov(T, Val); 3023 _mov(T, Val);
2945 _rol(T, Eight); 3024 _rol(T, Eight);
2946 _mov(Dest, T); 3025 _mov(Dest, T);
2947 } 3026 }
2948 return; 3027 return;
2949 } 3028 }
2950 case Intrinsics::Ctpop: { 3029 case Intrinsics::Ctpop: {
2951 Variable *Dest = Instr->getDest(); 3030 Variable *Dest = Instr->getDest();
3031 Variable *T = nullptr;
2952 Operand *Val = Instr->getArg(0); 3032 Operand *Val = Instr->getArg(0);
2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 3033 Type ValTy = Val->getType();
2954 ? H_call_ctpop_i32 3034 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
2955 : H_call_ctpop_i64, 3035
2956 Dest, 1); 3036 if (!Traits::Is64Bit) {
3037 T = Dest;
3038 } else {
3039 T = makeReg(IceType_i64);
3040 if (ValTy == IceType_i32) {
3041 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
3042 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
3043 // ensure we will not have any bits set on Val's upper 32 bits.
3044 Variable *V = makeReg(IceType_i64);
3045 _movzx(V, Val);
3046 Val = V;
3047 }
3048 ValTy = IceType_i64;
3049 }
3050
3051 InstCall *Call = makeHelperCall(
3052 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
2957 Call->addArg(Val); 3053 Call->addArg(Val);
2958 lowerCall(Call); 3054 lowerCall(Call);
2959 // The popcount helpers always return 32-bit values, while the intrinsic's 3055 // The popcount helpers always return 32-bit values, while the intrinsic's
2960 // signature matches the native POPCNT instruction and fills a 64-bit reg 3056 // signature matches the native POPCNT instruction and fills a 64-bit reg
2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case 3057 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2962 // the user doesn't do that in the IR. If the user does that in the IR, 3058 // the user doesn't do that in the IR. If the user does that in the IR,
2963 // then this zero'ing instruction is dead and gets optimized out. 3059 // then this zero'ing instruction is dead and gets optimized out.
2964 if (Val->getType() == IceType_i64) { 3060 if (!Traits::Is64Bit) {
2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3061 assert(T == Dest);
2966 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3062 if (Val->getType() == IceType_i64) {
2967 _mov(DestHi, Zero); 3063 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3064 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3065 _mov(DestHi, Zero);
3066 }
3067 } else {
3068 assert(Val->getType() == IceType_i64);
3069 // T is 64 bit. It needs to be copied to dest. We need to:
3070 //
3071 // T_1.32 = trunc T.64 to i32
3072 // T_2.64 = zext T_1.32 to i64
3073 // Dest.<<right_size>> = T_2.<<right_size>>
3074 //
3075 // which ensures the upper 32 bits will always be cleared. Just doing a
3076 //
3077 // mov Dest.32 = trunc T.32 to i32
3078 //
3079 // is dangerous because there's a chance the compiler will optimize this
3080 // copy out. To use _movzx we need two new registers (one 32-, and
3081 // another 64-bit wide.)
3082 Variable *T_1 = makeReg(IceType_i32);
3083 _mov(T_1, T);
3084 Variable *T_2 = makeReg(IceType_i64);
3085 _movzx(T_2, T_1);
3086 _mov(Dest, T_2);
2968 } 3087 }
2969 return; 3088 return;
2970 } 3089 }
2971 case Intrinsics::Ctlz: { 3090 case Intrinsics::Ctlz: {
2972 // The "is zero undef" parameter is ignored and we always return 3091 // The "is zero undef" parameter is ignored and we always return
2973 // a well-defined value. 3092 // a well-defined value.
2974 Operand *Val = legalize(Instr->getArg(0)); 3093 Operand *Val = legalize(Instr->getArg(0));
2975 Operand *FirstVal; 3094 Operand *FirstVal;
2976 Operand *SecondVal = nullptr; 3095 Operand *SecondVal = nullptr;
2977 if (Val->getType() == IceType_i64) { 3096 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2978 FirstVal = loOperand(Val); 3097 FirstVal = loOperand(Val);
2979 SecondVal = hiOperand(Val); 3098 SecondVal = hiOperand(Val);
2980 } else { 3099 } else {
2981 FirstVal = Val; 3100 FirstVal = Val;
2982 } 3101 }
2983 const bool IsCttz = false; 3102 const bool IsCttz = false;
2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3103 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2985 SecondVal); 3104 SecondVal);
2986 return; 3105 return;
2987 } 3106 }
2988 case Intrinsics::Cttz: { 3107 case Intrinsics::Cttz: {
2989 // The "is zero undef" parameter is ignored and we always return 3108 // The "is zero undef" parameter is ignored and we always return
2990 // a well-defined value. 3109 // a well-defined value.
2991 Operand *Val = legalize(Instr->getArg(0)); 3110 Operand *Val = legalize(Instr->getArg(0));
2992 Operand *FirstVal; 3111 Operand *FirstVal;
2993 Operand *SecondVal = nullptr; 3112 Operand *SecondVal = nullptr;
2994 if (Val->getType() == IceType_i64) { 3113 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2995 FirstVal = hiOperand(Val); 3114 FirstVal = hiOperand(Val);
2996 SecondVal = loOperand(Val); 3115 SecondVal = loOperand(Val);
2997 } else { 3116 } else {
2998 FirstVal = Val; 3117 FirstVal = Val;
2999 } 3118 }
3000 const bool IsCttz = true; 3119 const bool IsCttz = true;
3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3120 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3002 SecondVal); 3121 SecondVal);
3003 return; 3122 return;
3004 } 3123 }
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
3098 Func->setError("Should not be lowering UnknownIntrinsic"); 3217 Func->setError("Should not be lowering UnknownIntrinsic");
3099 return; 3218 return;
3100 } 3219 }
3101 return; 3220 return;
3102 } 3221 }
3103 3222
3104 template <class Machine> 3223 template <class Machine>
3105 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3224 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3106 Operand *Ptr, Operand *Expected, 3225 Operand *Ptr, Operand *Expected,
3107 Operand *Desired) { 3226 Operand *Desired) {
3108 if (Expected->getType() == IceType_i64) { 3227 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
3109 // Reserve the pre-colored registers first, before adding any more 3228 // Reserve the pre-colored registers first, before adding any more
3110 // infinite-weight variables from formMemoryOperand's legalization. 3229 // infinite-weight variables from formMemoryOperand's legalization.
3111 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3230 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3112 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3231 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3113 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3232 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3114 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3233 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3115 _mov(T_eax, loOperand(Expected)); 3234 _mov(T_eax, loOperand(Expected));
3116 _mov(T_edx, hiOperand(Expected)); 3235 _mov(T_edx, hiOperand(Expected));
3117 _mov(T_ebx, loOperand(Desired)); 3236 _mov(T_ebx, loOperand(Desired));
3118 _mov(T_ecx, hiOperand(Desired)); 3237 _mov(T_ecx, hiOperand(Desired));
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
3216 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3335 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3217 Operand *Ptr, Operand *Val) { 3336 Operand *Ptr, Operand *Val) {
3218 bool NeedsCmpxchg = false; 3337 bool NeedsCmpxchg = false;
3219 LowerBinOp Op_Lo = nullptr; 3338 LowerBinOp Op_Lo = nullptr;
3220 LowerBinOp Op_Hi = nullptr; 3339 LowerBinOp Op_Hi = nullptr;
3221 switch (Operation) { 3340 switch (Operation) {
3222 default: 3341 default:
3223 Func->setError("Unknown AtomicRMW operation"); 3342 Func->setError("Unknown AtomicRMW operation");
3224 return; 3343 return;
3225 case Intrinsics::AtomicAdd: { 3344 case Intrinsics::AtomicAdd: {
3226 if (Dest->getType() == IceType_i64) { 3345 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3227 // All the fall-through paths must set this to true, but use this 3346 // All the fall-through paths must set this to true, but use this
3228 // for asserting. 3347 // for asserting.
3229 NeedsCmpxchg = true; 3348 NeedsCmpxchg = true;
3230 Op_Lo = &TargetX86Base<Machine>::_add; 3349 Op_Lo = &TargetX86Base<Machine>::_add;
3231 Op_Hi = &TargetX86Base<Machine>::_adc; 3350 Op_Hi = &TargetX86Base<Machine>::_adc;
3232 break; 3351 break;
3233 } 3352 }
3234 typename Traits::X86OperandMem *Addr = 3353 typename Traits::X86OperandMem *Addr =
3235 formMemoryOperand(Ptr, Dest->getType()); 3354 formMemoryOperand(Ptr, Dest->getType());
3236 const bool Locked = true; 3355 const bool Locked = true;
3237 Variable *T = nullptr; 3356 Variable *T = nullptr;
3238 _mov(T, Val); 3357 _mov(T, Val);
3239 _xadd(Addr, T, Locked); 3358 _xadd(Addr, T, Locked);
3240 _mov(Dest, T); 3359 _mov(Dest, T);
3241 return; 3360 return;
3242 } 3361 }
3243 case Intrinsics::AtomicSub: { 3362 case Intrinsics::AtomicSub: {
3244 if (Dest->getType() == IceType_i64) { 3363 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3245 NeedsCmpxchg = true; 3364 NeedsCmpxchg = true;
3246 Op_Lo = &TargetX86Base<Machine>::_sub; 3365 Op_Lo = &TargetX86Base<Machine>::_sub;
3247 Op_Hi = &TargetX86Base<Machine>::_sbb; 3366 Op_Hi = &TargetX86Base<Machine>::_sbb;
3248 break; 3367 break;
3249 } 3368 }
3250 typename Traits::X86OperandMem *Addr = 3369 typename Traits::X86OperandMem *Addr =
3251 formMemoryOperand(Ptr, Dest->getType()); 3370 formMemoryOperand(Ptr, Dest->getType());
3252 const bool Locked = true; 3371 const bool Locked = true;
3253 Variable *T = nullptr; 3372 Variable *T = nullptr;
3254 _mov(T, Val); 3373 _mov(T, Val);
(...skipping 16 matching lines...) Expand all
3271 NeedsCmpxchg = true; 3390 NeedsCmpxchg = true;
3272 Op_Lo = &TargetX86Base<Machine>::_and; 3391 Op_Lo = &TargetX86Base<Machine>::_and;
3273 Op_Hi = &TargetX86Base<Machine>::_and; 3392 Op_Hi = &TargetX86Base<Machine>::_and;
3274 break; 3393 break;
3275 case Intrinsics::AtomicXor: 3394 case Intrinsics::AtomicXor:
3276 NeedsCmpxchg = true; 3395 NeedsCmpxchg = true;
3277 Op_Lo = &TargetX86Base<Machine>::_xor; 3396 Op_Lo = &TargetX86Base<Machine>::_xor;
3278 Op_Hi = &TargetX86Base<Machine>::_xor; 3397 Op_Hi = &TargetX86Base<Machine>::_xor;
3279 break; 3398 break;
3280 case Intrinsics::AtomicExchange: 3399 case Intrinsics::AtomicExchange:
3281 if (Dest->getType() == IceType_i64) { 3400 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3282 NeedsCmpxchg = true; 3401 NeedsCmpxchg = true;
3283 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3402 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3284 // just need to be moved to the ecx and ebx registers. 3403 // just need to be moved to the ecx and ebx registers.
3285 Op_Lo = nullptr; 3404 Op_Lo = nullptr;
3286 Op_Hi = nullptr; 3405 Op_Hi = nullptr;
3287 break; 3406 break;
3288 } 3407 }
3289 typename Traits::X86OperandMem *Addr = 3408 typename Traits::X86OperandMem *Addr =
3290 formMemoryOperand(Ptr, Dest->getType()); 3409 formMemoryOperand(Ptr, Dest->getType());
3291 Variable *T = nullptr; 3410 Variable *T = nullptr;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
3325 // .LABEL: 3444 // .LABEL:
3326 // mov <reg>, eax 3445 // mov <reg>, eax
3327 // op <reg>, [desired_adj] 3446 // op <reg>, [desired_adj]
3328 // lock cmpxchg [ptr], <reg> 3447 // lock cmpxchg [ptr], <reg>
3329 // jne .LABEL 3448 // jne .LABEL
3330 // mov <dest>, eax 3449 // mov <dest>, eax
3331 // 3450 //
3332 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3451 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3333 Val = legalize(Val); 3452 Val = legalize(Val);
3334 Type Ty = Val->getType(); 3453 Type Ty = Val->getType();
3335 if (Ty == IceType_i64) { 3454 if (!Traits::Is64Bit && Ty == IceType_i64) {
3336 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3455 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3337 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3456 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3338 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3457 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3339 _mov(T_eax, loOperand(Addr)); 3458 _mov(T_eax, loOperand(Addr));
3340 _mov(T_edx, hiOperand(Addr)); 3459 _mov(T_edx, hiOperand(Addr));
3341 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3460 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3342 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3461 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3343 typename Traits::Insts::Label *Label = 3462 typename Traits::Insts::Label *Label =
3344 Traits::Insts::Label::create(Func, this); 3463 Traits::Insts::Label::create(Func, this);
3345 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3464 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
3457 if (Cttz) { 3576 if (Cttz) {
3458 _mov(T_Dest, ThirtyTwo); 3577 _mov(T_Dest, ThirtyTwo);
3459 } else { 3578 } else {
3460 Constant *SixtyThree = Ctx->getConstantInt32(63); 3579 Constant *SixtyThree = Ctx->getConstantInt32(63);
3461 _mov(T_Dest, SixtyThree); 3580 _mov(T_Dest, SixtyThree);
3462 } 3581 }
3463 _cmov(T_Dest, T, Traits::Cond::Br_ne); 3582 _cmov(T_Dest, T, Traits::Cond::Br_ne);
3464 if (!Cttz) { 3583 if (!Cttz) {
3465 _xor(T_Dest, ThirtyOne); 3584 _xor(T_Dest, ThirtyOne);
3466 } 3585 }
3467 if (Ty == IceType_i32) { 3586 if (Traits::Is64Bit || Ty == IceType_i32) {
3468 _mov(Dest, T_Dest); 3587 _mov(Dest, T_Dest);
3469 return; 3588 return;
3470 } 3589 }
3471 _add(T_Dest, ThirtyTwo); 3590 _add(T_Dest, ThirtyTwo);
3472 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3591 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3473 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3592 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3474 // Will be using "test" on this, so we need a registerized variable. 3593 // Will be using "test" on this, so we need a registerized variable.
3475 Variable *SecondVar = legalizeToReg(SecondVal); 3594 Variable *SecondVar = legalizeToReg(SecondVal);
3476 Variable *T_Dest2 = makeReg(IceType_i32); 3595 Variable *T_Dest2 = makeReg(IceType_i32);
3477 if (Cttz) { 3596 if (Cttz) {
(...skipping 568 matching lines...) Expand 10 before | Expand all | Expand 10 after
4046 return; 4165 return;
4047 } 4166 }
4048 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4167 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4049 // But if SrcT is immediate, we might be able to do better, as 4168 // But if SrcT is immediate, we might be able to do better, as
4050 // the cmov instruction doesn't allow an immediate operand: 4169 // the cmov instruction doesn't allow an immediate operand:
4051 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4170 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4052 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4171 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4053 std::swap(SrcT, SrcF); 4172 std::swap(SrcT, SrcF);
4054 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4173 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4055 } 4174 }
4056 if (DestTy == IceType_i64) { 4175 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4057 SrcT = legalizeUndef(SrcT); 4176 SrcT = legalizeUndef(SrcT);
4058 SrcF = legalizeUndef(SrcF); 4177 SrcF = legalizeUndef(SrcF);
4059 // Set the low portion. 4178 // Set the low portion.
4060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4179 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4061 Variable *TLo = nullptr; 4180 Variable *TLo = nullptr;
4062 Operand *SrcFLo = legalize(loOperand(SrcF)); 4181 Operand *SrcFLo = legalize(loOperand(SrcF));
4063 _mov(TLo, SrcFLo); 4182 _mov(TLo, SrcFLo);
4064 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); 4183 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4065 _cmov(TLo, SrcTLo, Cond); 4184 _cmov(TLo, SrcTLo, Cond);
4066 _mov(DestLo, TLo); 4185 _mov(DestLo, TLo);
4067 // Set the high portion. 4186 // Set the high portion.
4068 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4187 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4069 Variable *THi = nullptr; 4188 Variable *THi = nullptr;
4070 Operand *SrcFHi = legalize(hiOperand(SrcF)); 4189 Operand *SrcFHi = legalize(hiOperand(SrcF));
4071 _mov(THi, SrcFHi); 4190 _mov(THi, SrcFHi);
4072 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); 4191 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4073 _cmov(THi, SrcTHi, Cond); 4192 _cmov(THi, SrcTHi, Cond);
4074 _mov(DestHi, THi); 4193 _mov(DestHi, THi);
4075 return; 4194 return;
4076 } 4195 }
4077 4196
4078 assert(DestTy == IceType_i16 || DestTy == IceType_i32); 4197 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4198 (Traits::Is64Bit && DestTy == IceType_i64));
4079 Variable *T = nullptr; 4199 Variable *T = nullptr;
4080 SrcF = legalize(SrcF); 4200 SrcF = legalize(SrcF);
4081 _mov(T, SrcF); 4201 _mov(T, SrcF);
4082 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4202 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4083 _cmov(T, SrcT, Cond); 4203 _cmov(T, SrcT, Cond);
4084 _mov(Dest, T); 4204 _mov(Dest, T);
4085 } 4205 }
4086 4206
4087 template <class Machine> 4207 template <class Machine>
4088 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4208 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4089 Operand *Value = Inst->getData(); 4209 Operand *Value = Inst->getData();
4090 Operand *Addr = Inst->getAddr(); 4210 Operand *Addr = Inst->getAddr();
4091 typename Traits::X86OperandMem *NewAddr = 4211 typename Traits::X86OperandMem *NewAddr =
4092 formMemoryOperand(Addr, Value->getType()); 4212 formMemoryOperand(Addr, Value->getType());
4093 Type Ty = NewAddr->getType(); 4213 Type Ty = NewAddr->getType();
4094 4214
4095 if (Ty == IceType_i64) { 4215 if (!Traits::Is64Bit && Ty == IceType_i64) {
4096 Value = legalizeUndef(Value); 4216 Value = legalizeUndef(Value);
4097 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4217 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4098 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4218 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4099 _store(ValueHi, 4219 _store(ValueHi,
4100 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); 4220 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4101 _store(ValueLo, 4221 _store(ValueLo,
4102 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); 4222 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
4103 } else if (isVectorType(Ty)) { 4223 } else if (isVectorType(Ty)) {
4104 _storep(legalizeToReg(Value), NewAddr); 4224 _storep(legalizeToReg(Value), NewAddr);
4105 } else { 4225 } else {
(...skipping 27 matching lines...) Expand all
4133 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4253 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4134 Context.insert(NewStore); 4254 Context.insert(NewStore);
4135 } 4255 }
4136 } 4256 }
4137 4257
4138 template <class Machine> 4258 template <class Machine>
4139 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, 4259 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4140 uint64_t Min, uint64_t Max) { 4260 uint64_t Min, uint64_t Max) {
4141 // TODO(ascull): 64-bit should not reach here but only because it is not 4261 // TODO(ascull): 64-bit should not reach here but only because it is not
4142 // implemented yet. This should be able to handle the 64-bit case. 4262 // implemented yet. This should be able to handle the 64-bit case.
4143 assert(Comparison->getType() != IceType_i64); 4263 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
4144 // Subtracting 0 is a nop so don't do it 4264 // Subtracting 0 is a nop so don't do it
4145 if (Min != 0) { 4265 if (Min != 0) {
4146 // Avoid clobbering the comparison by copying it 4266 // Avoid clobbering the comparison by copying it
4147 Variable *T = nullptr; 4267 Variable *T = nullptr;
4148 _mov(T, Comparison); 4268 _mov(T, Comparison);
4149 _sub(T, Ctx->getConstantInt32(Min)); 4269 _sub(T, Ctx->getConstantInt32(Min));
4150 Comparison = T; 4270 Comparison = T;
4151 } 4271 }
4152 4272
4153 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); 4273 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
4232 4352
4233 template <class Machine> 4353 template <class Machine>
4234 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 4354 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4235 // Group cases together and navigate through them with a binary search 4355 // Group cases together and navigate through them with a binary search
4236 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); 4356 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4237 Operand *Src0 = Inst->getComparison(); 4357 Operand *Src0 = Inst->getComparison();
4238 CfgNode *DefaultTarget = Inst->getLabelDefault(); 4358 CfgNode *DefaultTarget = Inst->getLabelDefault();
4239 4359
4240 assert(CaseClusters.size() != 0); // Should always be at least one 4360 assert(CaseClusters.size() != 0); // Should always be at least one
4241 4361
4242 if (Src0->getType() == IceType_i64) { 4362 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
4243 Src0 = legalize(Src0); // get Base/Index into physical registers 4363 Src0 = legalize(Src0); // get Base/Index into physical registers
4244 Operand *Src0Lo = loOperand(Src0); 4364 Operand *Src0Lo = loOperand(Src0);
4245 Operand *Src0Hi = hiOperand(Src0); 4365 Operand *Src0Hi = hiOperand(Src0);
4246 if (CaseClusters.back().getHigh() > UINT32_MAX) { 4366 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4247 // TODO(ascull): handle 64-bit case properly (currently naive version) 4367 // TODO(ascull): handle 64-bit case properly (currently naive version)
4248 // This might be handled by a higher level lowering of switches. 4368 // This might be handled by a higher level lowering of switches.
4249 SizeT NumCases = Inst->getNumCases(); 4369 SizeT NumCases = Inst->getNumCases();
4250 if (NumCases >= 2) { 4370 if (NumCases >= 2) {
4251 Src0Lo = legalizeToReg(Src0Lo); 4371 Src0Lo = legalizeToReg(Src0Lo);
4252 Src0Hi = legalizeToReg(Src0Hi); 4372 Src0Hi = legalizeToReg(Src0Hi);
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
4437 // that follows. This means that the original Store instruction is 4557 // that follows. This means that the original Store instruction is
4438 // still there, either because the value being stored is used beyond 4558 // still there, either because the value being stored is used beyond
4439 // the Store instruction, or because dead code elimination did not 4559 // the Store instruction, or because dead code elimination did not
4440 // happen. In either case, we cancel RMW lowering (and the caller 4560 // happen. In either case, we cancel RMW lowering (and the caller
4441 // deletes the RMW instruction). 4561 // deletes the RMW instruction).
4442 if (!RMW->isLastUse(RMW->getBeacon())) 4562 if (!RMW->isLastUse(RMW->getBeacon()))
4443 return; 4563 return;
4444 Operand *Src = RMW->getData(); 4564 Operand *Src = RMW->getData();
4445 Type Ty = Src->getType(); 4565 Type Ty = Src->getType();
4446 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 4566 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4447 if (Ty == IceType_i64) { 4567 if (!Traits::Is64Bit && Ty == IceType_i64) {
4448 Src = legalizeUndef(Src); 4568 Src = legalizeUndef(Src);
4449 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 4569 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4450 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 4570 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
4451 typename Traits::X86OperandMem *AddrLo = 4571 typename Traits::X86OperandMem *AddrLo =
4452 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); 4572 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4453 typename Traits::X86OperandMem *AddrHi = 4573 typename Traits::X86OperandMem *AddrHi =
4454 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); 4574 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
4455 switch (RMW->getOp()) { 4575 switch (RMW->getOp()) {
4456 default: 4576 default:
4457 // TODO(stichnot): Implement other arithmetic operators. 4577 // TODO(stichnot): Implement other arithmetic operators.
(...skipping 13 matching lines...) Expand all
4471 case InstArithmetic::Or: 4591 case InstArithmetic::Or:
4472 _or_rmw(AddrLo, SrcLo); 4592 _or_rmw(AddrLo, SrcLo);
4473 _or_rmw(AddrHi, SrcHi); 4593 _or_rmw(AddrHi, SrcHi);
4474 return; 4594 return;
4475 case InstArithmetic::Xor: 4595 case InstArithmetic::Xor:
4476 _xor_rmw(AddrLo, SrcLo); 4596 _xor_rmw(AddrLo, SrcLo);
4477 _xor_rmw(AddrHi, SrcHi); 4597 _xor_rmw(AddrHi, SrcHi);
4478 return; 4598 return;
4479 } 4599 }
4480 } else { 4600 } else {
4481 // i8, i16, i32 4601 // x86-32: i8, i16, i32
4602 // x86-64: i8, i16, i32, i64
4482 switch (RMW->getOp()) { 4603 switch (RMW->getOp()) {
4483 default: 4604 default:
4484 // TODO(stichnot): Implement other arithmetic operators. 4605 // TODO(stichnot): Implement other arithmetic operators.
4485 break; 4606 break;
4486 case InstArithmetic::Add: 4607 case InstArithmetic::Add:
4487 Src = legalize(Src, Legal_Reg | Legal_Imm); 4608 Src = legalize(Src, Legal_Reg | Legal_Imm);
4488 _add_rmw(Addr, Src); 4609 _add_rmw(Addr, Src);
4489 return; 4610 return;
4490 case InstArithmetic::Sub: 4611 case InstArithmetic::Sub:
4491 Src = legalize(Src, Legal_Reg | Legal_Imm); 4612 Src = legalize(Src, Legal_Reg | Legal_Imm);
(...skipping 24 matching lines...) Expand all
4516 } else { 4637 } else {
4517 TargetLowering::lowerOther(Instr); 4638 TargetLowering::lowerOther(Instr);
4518 } 4639 }
4519 } 4640 }
4520 4641
4521 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4642 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4522 /// preserve integrity of liveness analysis. Undef values are also 4643 /// preserve integrity of liveness analysis. Undef values are also
4523 /// turned into zeroes, since loOperand() and hiOperand() don't expect 4644 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4524 /// Undef input. 4645 /// Undef input.
4525 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4646 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4526 // Pause constant blinding or pooling, blinding or pooling will be done later 4647 if (Traits::Is64Bit) {
4527 // during phi lowering assignments 4648 // On x86-64 we don't need to prelower phis -- the architecture can handle
4649 // 64-bit integer natively.
4650 return;
4651 }
4652
4653 // Pause constant blinding or pooling, blinding or pooling will be done
4654 // later during phi lowering assignments
4528 BoolFlagSaver B(RandomizationPoolingPaused, true); 4655 BoolFlagSaver B(RandomizationPoolingPaused, true);
4529 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 4656 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4530 this, Context.getNode(), Func); 4657 this, Context.getNode(), Func);
4531 } 4658 }
4532 4659
4533 // There is no support for loading or emitting vector constants, so the 4660 // There is no support for loading or emitting vector constants, so the
4534 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4661 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4535 // etc. are initialized with register operations. 4662 // etc. are initialized with register operations.
4536 // 4663 //
4537 // TODO(wala): Add limited support for vector constants so that 4664 // TODO(wala): Add limited support for vector constants so that
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
4678 if (auto *Const = llvm::dyn_cast<Constant>(From)) { 4805 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
4679 if (llvm::isa<ConstantUndef>(Const)) { 4806 if (llvm::isa<ConstantUndef>(Const)) {
4680 From = legalizeUndef(Const, RegNum); 4807 From = legalizeUndef(Const, RegNum);
4681 if (isVectorType(Ty)) 4808 if (isVectorType(Ty))
4682 return From; 4809 return From;
4683 Const = llvm::cast<Constant>(From); 4810 Const = llvm::cast<Constant>(From);
4684 } 4811 }
4685 // There should be no constants of vector type (other than undef). 4812 // There should be no constants of vector type (other than undef).
4686 assert(!isVectorType(Ty)); 4813 assert(!isVectorType(Ty));
4687 4814
4815 // If the operand is a 64 bit constant integer we need to legalize it to a
4816 // register in x86-64.
4817 if (Traits::Is64Bit) {
4818 if (llvm::isa<ConstantInteger64>(Const)) {
4819 Variable *V = copyToReg(Const, RegNum);
4820 V->setWeightInfinite();
4821 return V;
4822 }
4823 }
4824
4688 // If the operand is an 32 bit constant integer, we should check 4825 // If the operand is an 32 bit constant integer, we should check
4689 // whether we need to randomize it or pool it. 4826 // whether we need to randomize it or pool it.
4690 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { 4827 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
4691 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); 4828 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
4692 if (NewConst != Const) { 4829 if (NewConst != Const) {
4693 return NewConst; 4830 return NewConst;
4694 } 4831 }
4695 } 4832 }
4696 4833
4697 // Convert a scalar floating point constant into an explicit 4834 // Convert a scalar floating point constant into an explicit
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
4815 } 4952 }
4816 // Do legalization, which contains randomization/pooling 4953 // Do legalization, which contains randomization/pooling
4817 // or do randomization/pooling. 4954 // or do randomization/pooling.
4818 return llvm::cast<typename Traits::X86OperandMem>( 4955 return llvm::cast<typename Traits::X86OperandMem>(
4819 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 4956 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
4820 } 4957 }
4821 4958
4822 template <class Machine> 4959 template <class Machine>
4823 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 4960 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
4824 // There aren't any 64-bit integer registers for x86-32. 4961 // There aren't any 64-bit integer registers for x86-32.
4825 assert(Type != IceType_i64); 4962 assert(Traits::Is64Bit || Type != IceType_i64);
4826 Variable *Reg = Func->makeVariable(Type); 4963 Variable *Reg = Func->makeVariable(Type);
4827 if (RegNum == Variable::NoRegister) 4964 if (RegNum == Variable::NoRegister)
4828 Reg->setWeightInfinite(); 4965 Reg->setWeightInfinite();
4829 else 4966 else
4830 Reg->setRegNum(RegNum); 4967 Reg->setRegNum(RegNum);
4831 return Reg; 4968 return Reg;
4832 } 4969 }
4833 4970
4834 template <class Machine> void TargetX86Base<Machine>::postLower() { 4971 template <class Machine> void TargetX86Base<Machine>::postLower() {
4835 if (Ctx->getFlags().getOptLevel() == Opt_m1) 4972 if (Ctx->getFlags().getOptLevel() == Opt_m1)
(...skipping 11 matching lines...) Expand all
4847 4984
4848 template <class Machine> 4985 template <class Machine>
4849 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { 4986 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
4850 if (!BuildDefs::dump()) 4987 if (!BuildDefs::dump())
4851 return; 4988 return;
4852 Ostream &Str = Ctx->getStrEmit(); 4989 Ostream &Str = Ctx->getStrEmit();
4853 Str << getConstantPrefix() << C->getValue(); 4990 Str << getConstantPrefix() << C->getValue();
4854 } 4991 }
4855 4992
4856 template <class Machine> 4993 template <class Machine>
4857 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { 4994 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
4858 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 4995 if (!Traits::Is64Bit) {
4996 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
4997 } else {
4998 if (!BuildDefs::dump())
4999 return;
5000 Ostream &Str = Ctx->getStrEmit();
5001 Str << getConstantPrefix() << C->getValue();
5002 }
4859 } 5003 }
4860 5004
4861 template <class Machine> 5005 template <class Machine>
4862 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { 5006 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
4863 if (!BuildDefs::dump()) 5007 if (!BuildDefs::dump())
4864 return; 5008 return;
4865 Ostream &Str = Ctx->getStrEmit(); 5009 Ostream &Str = Ctx->getStrEmit();
4866 C->emitPoolLabel(Str); 5010 C->emitPoolLabel(Str);
4867 } 5011 }
4868 5012
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
4993 Constant *Mask1 = Ctx->getConstantInt( 5137 Constant *Mask1 = Ctx->getConstantInt(
4994 MemOperand->getOffset()->getType(), Cookie + Value); 5138 MemOperand->getOffset()->getType(), Cookie + Value);
4995 Constant *Mask2 = 5139 Constant *Mask2 =
4996 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5140 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
4997 5141
4998 typename Traits::X86OperandMem *TempMemOperand = 5142 typename Traits::X86OperandMem *TempMemOperand =
4999 Traits::X86OperandMem::create(Func, MemOperand->getType(), 5143 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5000 MemOperand->getBase(), Mask1); 5144 MemOperand->getBase(), Mask1);
5001 // If we have already assigned a physical register, we must come from 5145 // If we have already assigned a physical register, we must come from
5002 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5146 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5003 // the assigned register as this assignment is that start of its use-def 5147 // the assigned register as this assignment is that start of its
5004 // chain. So we add RegNum argument here. 5148 // use-def chain. So we add RegNum argument here.
5005 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 5149 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5006 _lea(RegTemp, TempMemOperand); 5150 _lea(RegTemp, TempMemOperand);
5007 // As source operand doesn't use the dstreg, we don't need to add 5151 // As source operand doesn't use the dstreg, we don't need to add
5008 // _set_dest_nonkillable(). 5152 // _set_dest_nonkillable().
5009 // But if we use the same Dest Reg, that is, with RegNum 5153 // But if we use the same Dest Reg, that is, with RegNum
5010 // assigned, we should add this _set_dest_nonkillable() 5154 // assigned, we should add this _set_dest_nonkillable()
5011 if (RegNum != Variable::NoRegister) 5155 if (RegNum != Variable::NoRegister)
5012 _set_dest_nonkillable(); 5156 _set_dest_nonkillable();
5013 5157
5014 typename Traits::X86OperandMem *NewMemOperand = 5158 typename Traits::X86OperandMem *NewMemOperand =
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
5077 } 5221 }
5078 // the offset is not eligible for blinding or pooling, return the original 5222 // the offset is not eligible for blinding or pooling, return the original
5079 // mem operand 5223 // mem operand
5080 return MemOperand; 5224 return MemOperand;
5081 } 5225 }
5082 5226
5083 } // end of namespace X86Internal 5227 } // end of namespace X86Internal
5084 } // end of namespace Ice 5228 } // end of namespace Ice
5085 5229
5086 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5230 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | unittest/AssemblerX8632/DataMov.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698