Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1273153002: Subzero. Native 64-bit int arithmetic on x86-64. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Removes the x8664-specific xtest target. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 }; 113 };
114 114
115 template <class MachineTraits> 115 template <class MachineTraits>
116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) 116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
118 118
119 template <class MachineTraits> 119 template <class MachineTraits>
120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
122 if (llvm::isa<InstIcmp>(Instr)) { 122 if (llvm::isa<InstIcmp>(Instr)) {
123 if (Instr->getSrc(0)->getType() != IceType_i64) 123 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
124 return PK_Icmp32; 124 return PK_Icmp32;
Jim Stichnoth 2015/08/10 19:39:20 Probably need to rename PK_Icmp32 to something ref
John 2015/08/10 20:41:17 TODO'ed.
Jim Stichnoth 2015/08/11 16:01:36 TODO'ed, dude? TODONE.
125 return PK_None; // TODO(stichnot): actually PK_Icmp64; 125 return PK_None; // TODO(stichnot): actually PK_Icmp64;
126 } 126 }
127 return PK_None; // TODO(stichnot): remove this 127 return PK_None; // TODO(stichnot): remove this
128 128
129 if (llvm::isa<InstFcmp>(Instr)) 129 if (llvm::isa<InstFcmp>(Instr))
130 return PK_Fcmp; 130 return PK_Fcmp;
131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
132 switch (Cast->getCastKind()) { 132 switch (Cast->getCastKind()) {
133 default: 133 default:
134 return PK_None; 134 return PK_None;
(...skipping 501 matching lines...) Expand 10 before | Expand all | Expand 10 after
636 // instruction or equivalent. 636 // instruction or equivalent.
637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
638 // An InstLoad always qualifies. 638 // An InstLoad always qualifies.
639 LoadDest = Load->getDest(); 639 LoadDest = Load->getDest();
640 const bool DoLegalize = false; 640 const bool DoLegalize = false;
641 LoadSrc = formMemoryOperand(Load->getSourceAddress(), 641 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
642 LoadDest->getType(), DoLegalize); 642 LoadDest->getType(), DoLegalize);
643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { 643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
644 // An AtomicLoad intrinsic qualifies as long as it has a valid 644 // An AtomicLoad intrinsic qualifies as long as it has a valid
645 // memory ordering, and can be implemented in a single 645 // memory ordering, and can be implemented in a single
646 // instruction (i.e., not i64). 646 // instruction (i.e., not i64 on x86-32).
647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; 647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
648 if (ID == Intrinsics::AtomicLoad && 648 if (ID == Intrinsics::AtomicLoad &&
649 Intrin->getDest()->getType() != IceType_i64 && 649 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
650 Intrinsics::isMemoryOrderValid( 650 Intrinsics::isMemoryOrderValid(
651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { 651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
652 LoadDest = Intrin->getDest(); 652 LoadDest = Intrin->getDest();
653 const bool DoLegalize = false; 653 const bool DoLegalize = false;
654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), 654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
655 DoLegalize); 655 DoLegalize);
656 } 656 }
657 } 657 }
658 // A Load instruction can be folded into the following 658 // A Load instruction can be folded into the following
659 // instruction only if the following instruction ends the Load's 659 // instruction only if the following instruction ends the Load's
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
717 template <class Machine> 717 template <class Machine>
718 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 718 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { 719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
720 return Br->optimizeBranch(NextNode); 720 return Br->optimizeBranch(NextNode);
721 } 721 }
722 return false; 722 return false;
723 } 723 }
724 724
725 template <class Machine> 725 template <class Machine>
726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
727 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
728 if (RegNum == Traits::RegisterSet::Reg_esp ||
729 RegNum == Traits::RegisterSet::Reg_ebp)
730 Ty = Traits::WordType;
727 if (Ty == IceType_void) 731 if (Ty == IceType_void)
728 Ty = IceType_i32; 732 Ty = IceType_i32;
729 if (PhysicalRegisters[Ty].empty()) 733 if (PhysicalRegisters[Ty].empty())
730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 734 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
731 assert(RegNum < PhysicalRegisters[Ty].size()); 735 assert(RegNum < PhysicalRegisters[Ty].size());
732 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 736 Variable *Reg = PhysicalRegisters[Ty][RegNum];
733 if (Reg == nullptr) { 737 if (Reg == nullptr) {
734 Reg = Func->makeVariable(Ty); 738 Reg = Func->makeVariable(Ty);
735 Reg->setRegNum(RegNum); 739 Reg->setRegNum(RegNum);
736 PhysicalRegisters[Ty][RegNum] = Reg; 740 PhysicalRegisters[Ty][RegNum] = Reg;
(...skipping 26 matching lines...) Expand all
763 } 767 }
764 int32_t Offset = Var->getStackOffset(); 768 int32_t Offset = Var->getStackOffset();
765 int32_t BaseRegNum = Var->getBaseRegNum(); 769 int32_t BaseRegNum = Var->getBaseRegNum();
766 if (BaseRegNum == Variable::NoRegister) { 770 if (BaseRegNum == Variable::NoRegister) {
767 BaseRegNum = getFrameOrStackReg(); 771 BaseRegNum = getFrameOrStackReg();
768 if (!hasFramePointer()) 772 if (!hasFramePointer())
769 Offset += getStackAdjustment(); 773 Offset += getStackAdjustment();
770 } 774 }
771 if (Offset) 775 if (Offset)
772 Str << Offset; 776 Str << Offset;
773 const Type FrameSPTy = IceType_i32; 777 const Type FrameSPTy = Traits::WordType;
774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; 778 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
775 } 779 }
776 780
777 template <class Machine> 781 template <class Machine>
778 typename TargetX86Base<Machine>::Traits::Address 782 typename TargetX86Base<Machine>::Traits::Address
779 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { 783 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
780 if (Var->hasReg()) 784 if (Var->hasReg())
781 llvm_unreachable("Stack Variable has a register assigned"); 785 llvm_unreachable("Stack Variable has a register assigned");
782 if (Var->getWeight().isInf()) { 786 if (Var->getWeight().isInf()) {
783 llvm_unreachable("Infinite-weight Variable has no register assigned"); 787 llvm_unreachable("Infinite-weight Variable has no register assigned");
(...skipping 19 matching lines...) Expand all
803 /// function generates an instruction to copy Arg into its assigned 807 /// function generates an instruction to copy Arg into its assigned
804 /// register if applicable. 808 /// register if applicable.
805 template <class Machine> 809 template <class Machine>
806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 810 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
807 Variable *FramePtr, 811 Variable *FramePtr,
808 size_t BasicFrameOffset, 812 size_t BasicFrameOffset,
809 size_t &InArgsSizeBytes) { 813 size_t &InArgsSizeBytes) {
810 Variable *Lo = Arg->getLo(); 814 Variable *Lo = Arg->getLo();
811 Variable *Hi = Arg->getHi(); 815 Variable *Hi = Arg->getHi();
812 Type Ty = Arg->getType(); 816 Type Ty = Arg->getType();
813 if (Lo && Hi && Ty == IceType_i64) { 817 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
814 // TODO(jpp): This special case is not needed for x86-64.
815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 818 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 819 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 820 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 821 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
819 return; 822 return;
820 } 823 }
821 if (isVectorType(Ty)) { 824 if (isVectorType(Ty)) {
822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 825 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
823 } 826 }
824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 827 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 828 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
826 if (Arg->hasReg()) { 829 if (Arg->hasReg()) {
827 assert(Ty != IceType_i64); 830 assert(Ty != IceType_i64 || Traits::Is64Bit);
828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( 831 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 832 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
830 if (isVectorType(Arg->getType())) { 833 if (isVectorType(Arg->getType())) {
831 _movp(Arg, Mem); 834 _movp(Arg, Mem);
832 } else { 835 } else {
833 _mov(Arg, Mem); 836 _mov(Arg, Mem);
834 } 837 }
835 // This argument-copying instruction uses an explicit Traits::X86OperandMem 838 // This argument-copying instruction uses an explicit Traits::X86OperandMem
836 // operand instead of a Variable, so its fill-from-stack operation has to be 839 // operand instead of a Variable, so its fill-from-stack operation has to be
837 // tracked separately for statistics. 840 // tracked separately for statistics.
838 Ctx->statsUpdateFills(); 841 Ctx->statsUpdateFills();
839 } 842 }
840 } 843 }
841 844
842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 845 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
843 // TODO(jpp): this is wrong for x86-64. 846 return Traits::WordType;
844 return IceType_i32;
845 } 847 }
846 848
847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { 849 template <class Machine>
850 template <typename T>
851 typename std::enable_if<!T::Is64Bit, void>::type
852 TargetX86Base<Machine>::split64(Variable *Var) {
848 switch (Var->getType()) { 853 switch (Var->getType()) {
849 default: 854 default:
850 return; 855 return;
851 case IceType_i64: 856 case IceType_i64:
852 // TODO: Only consider F64 if we need to push each half when 857 // TODO: Only consider F64 if we need to push each half when
853 // passing as an argument to a function call. Note that each half 858 // passing as an argument to a function call. Note that each half
854 // is still typed as I32. 859 // is still typed as I32.
855 case IceType_f64: 860 case IceType_f64:
856 break; 861 break;
857 } 862 }
(...skipping 11 matching lines...) Expand all
869 Hi->setName(Func, Var->getName(Func) + "__hi"); 874 Hi->setName(Func, Var->getName(Func) + "__hi");
870 } 875 }
871 Var->setLoHi(Lo, Hi); 876 Var->setLoHi(Lo, Hi);
872 if (Var->getIsArg()) { 877 if (Var->getIsArg()) {
873 Lo->setIsArg(); 878 Lo->setIsArg();
874 Hi->setIsArg(); 879 Hi->setIsArg();
875 } 880 }
876 } 881 }
877 882
878 template <class Machine> 883 template <class Machine>
879 Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) { 884 template <typename T>
885 typename std::enable_if<!T::Is64Bit, Operand>::type *
886 TargetX86Base<Machine>::loOperand(Operand *Operand) {
880 assert(Operand->getType() == IceType_i64 || 887 assert(Operand->getType() == IceType_i64 ||
881 Operand->getType() == IceType_f64); 888 Operand->getType() == IceType_f64);
882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 889 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
883 return Operand; 890 return Operand;
884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 891 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
885 split64(Var); 892 split64(Var);
886 return Var->getLo(); 893 return Var->getLo();
887 } 894 }
888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 895 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 896 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 897 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
891 // Check if we need to blind/pool the constant. 898 // Check if we need to blind/pool the constant.
892 return legalize(ConstInt); 899 return legalize(ConstInt);
893 } 900 }
894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { 901 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
895 auto *MemOperand = Traits::X86OperandMem::create( 902 auto *MemOperand = Traits::X86OperandMem::create(
896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 903 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
897 Mem->getShift(), Mem->getSegmentRegister()); 904 Mem->getShift(), Mem->getSegmentRegister());
898 // Test if we should randomize or pool the offset, if so randomize it or 905 // Test if we should randomize or pool the offset, if so randomize it or
899 // pool it then create mem operand with the blinded/pooled constant. 906 // pool it then create mem operand with the blinded/pooled constant.
900 // Otherwise, return the mem operand as ordinary mem operand. 907 // Otherwise, return the mem operand as ordinary mem operand.
901 return legalize(MemOperand); 908 return legalize(MemOperand);
902 } 909 }
903 llvm_unreachable("Unsupported operand type"); 910 llvm_unreachable("Unsupported operand type");
904 return nullptr; 911 return nullptr;
905 } 912 }
906 913
907 template <class Machine> 914 template <class Machine>
908 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { 915 template <typename T>
916 typename std::enable_if<!T::Is64Bit, Operand>::type *
917 TargetX86Base<Machine>::hiOperand(Operand *Operand) {
909 assert(Operand->getType() == IceType_i64 || 918 assert(Operand->getType() == IceType_i64 ||
910 Operand->getType() == IceType_f64); 919 Operand->getType() == IceType_f64);
911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 920 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
912 return Operand; 921 return Operand;
913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 922 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
914 split64(Var); 923 split64(Var);
915 return Var->getHi(); 924 return Var->getHi();
916 } 925 }
917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 926 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( 927 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
1100 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { 1109 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1101 Variable *Dest = Inst->getDest(); 1110 Variable *Dest = Inst->getDest();
1102 Operand *Src0 = legalize(Inst->getSrc(0)); 1111 Operand *Src0 = legalize(Inst->getSrc(0));
1103 Operand *Src1 = legalize(Inst->getSrc(1)); 1112 Operand *Src1 = legalize(Inst->getSrc(1));
1104 if (Inst->isCommutative()) { 1113 if (Inst->isCommutative()) {
1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1114 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1106 std::swap(Src0, Src1); 1115 std::swap(Src0, Src1);
1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) 1116 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1108 std::swap(Src0, Src1); 1117 std::swap(Src0, Src1);
1109 } 1118 }
1110 if (Dest->getType() == IceType_i64) { 1119 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1111 // These helper-call-involved instructions are lowered in this 1120 // These x86-32 helper-call-involved instructions are lowered in this
1112 // separate switch. This is because loOperand() and hiOperand() 1121 // separate switch. This is because loOperand() and hiOperand()
1113 // may insert redundant instructions for constant blinding and 1122 // may insert redundant instructions for constant blinding and
1114 // pooling. Such redundant instructions will fail liveness analysis 1123 // pooling. Such redundant instructions will fail liveness analysis
1115 // under -Om1 setting. And, actually these arguments do not need 1124 // under -Om1 setting. And, actually these arguments do not need
1116 // to be processed with loOperand() and hiOperand() to be used. 1125 // to be processed with loOperand() and hiOperand() to be used.
1117 switch (Inst->getOp()) { 1126 switch (Inst->getOp()) {
1118 case InstArithmetic::Udiv: { 1127 case InstArithmetic::Udiv: {
1119 const SizeT MaxSrcs = 2; 1128 const SizeT MaxSrcs = 2;
1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1129 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1121 Call->addArg(Inst->getSrc(0)); 1130 Call->addArg(Inst->getSrc(0));
(...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after
1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1658 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1650 // this ever becomes a problem we can introduce a pseudo rem instruction 1659 // this ever becomes a problem we can introduce a pseudo rem instruction
1651 // that returns the remainder in %al directly (and uses a mov for copying 1660 // that returns the remainder in %al directly (and uses a mov for copying
1652 // %ah to %al.) 1661 // %ah to %al.)
1653 static constexpr uint8_t AlSizeInBits = 8; 1662 static constexpr uint8_t AlSizeInBits = 8;
1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1663 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1655 _mov(Dest, T); 1664 _mov(Dest, T);
1656 Context.insert(InstFakeUse::create(Func, T_eax)); 1665 Context.insert(InstFakeUse::create(Func, T_eax));
1657 } else { 1666 } else {
1658 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1667 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); 1668 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1669 _mov(T_edx, Zero);
1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1670 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1661 _div(T_edx, Src1, T); 1671 _div(T_edx, Src1, T);
1662 _mov(Dest, T_edx); 1672 _mov(Dest, T_edx);
1663 } 1673 }
1664 break; 1674 break;
1665 case InstArithmetic::Srem: 1675 case InstArithmetic::Srem:
1666 // TODO(stichnot): Enable this after doing better performance 1676 // TODO(stichnot): Enable this after doing better performance
1667 // and cross testing. 1677 // and cross testing.
1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1678 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1669 // Optimize mod by constant power of 2, but not for Om1 or O0, 1679 // Optimize mod by constant power of 2, but not for Om1 or O0,
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't 1724 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1725 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1716 // this ever becomes a problem we can introduce a pseudo rem instruction 1726 // this ever becomes a problem we can introduce a pseudo rem instruction
1717 // that returns the remainder in %al directly (and uses a mov for copying 1727 // that returns the remainder in %al directly (and uses a mov for copying
1718 // %ah to %al.) 1728 // %ah to %al.)
1719 static constexpr uint8_t AlSizeInBits = 8; 1729 static constexpr uint8_t AlSizeInBits = 8;
1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1730 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1721 _mov(Dest, T); 1731 _mov(Dest, T);
1722 Context.insert(InstFakeUse::create(Func, T_eax)); 1732 Context.insert(InstFakeUse::create(Func, T_eax));
1723 } else { 1733 } else {
1724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 1734 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1735 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1726 _cbwdq(T_edx, T); 1736 _cbwdq(T_edx, T);
1727 _idiv(T_edx, Src1, T); 1737 _idiv(T_edx, Src1, T);
1728 _mov(Dest, T_edx); 1738 _mov(Dest, T_edx);
1729 } 1739 }
1730 break; 1740 break;
1731 case InstArithmetic::Fadd: 1741 case InstArithmetic::Fadd:
1732 _mov(T, Src0); 1742 _mov(T, Src0);
1733 _addss(T, Src1); 1743 _addss(T, Src1);
1734 _mov(Dest, T); 1744 _mov(Dest, T);
(...skipping 23 matching lines...) Expand all
1758 return lowerCall(Call); 1768 return lowerCall(Call);
1759 } 1769 }
1760 } 1770 }
1761 } 1771 }
1762 1772
1763 template <class Machine> 1773 template <class Machine>
1764 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1774 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1765 Variable *Dest = Inst->getDest(); 1775 Variable *Dest = Inst->getDest();
1766 Operand *Src0 = Inst->getSrc(0); 1776 Operand *Src0 = Inst->getSrc(0);
1767 assert(Dest->getType() == Src0->getType()); 1777 assert(Dest->getType() == Src0->getType());
1768 if (Dest->getType() == IceType_i64) { 1778 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1769 Src0 = legalize(Src0); 1779 Src0 = legalize(Src0);
1770 Operand *Src0Lo = loOperand(Src0); 1780 Operand *Src0Lo = loOperand(Src0);
1771 Operand *Src0Hi = hiOperand(Src0); 1781 Operand *Src0Hi = hiOperand(Src0);
1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1782 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1783 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1774 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1784 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1775 _mov(T_Lo, Src0Lo); 1785 _mov(T_Lo, Src0Lo);
1776 _mov(DestLo, T_Lo); 1786 _mov(DestLo, T_Lo);
1777 _mov(T_Hi, Src0Hi); 1787 _mov(T_Hi, Src0Hi);
1778 _mov(DestHi, T_Hi); 1788 _mov(DestHi, T_Hi);
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
1863 SizeT ShiftAmount = 1873 SizeT ShiftAmount =
1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1874 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
1865 1; 1875 1;
1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 1876 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1867 Variable *T = makeReg(DestTy); 1877 Variable *T = makeReg(DestTy);
1868 _movp(T, Src0RM); 1878 _movp(T, Src0RM);
1869 _psll(T, ShiftConstant); 1879 _psll(T, ShiftConstant);
1870 _psra(T, ShiftConstant); 1880 _psra(T, ShiftConstant);
1871 _movp(Dest, T); 1881 _movp(Dest, T);
1872 } 1882 }
1873 } else if (Dest->getType() == IceType_i64) { 1883 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1884 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1875 Constant *Shift = Ctx->getConstantInt32(31); 1885 Constant *Shift = Ctx->getConstantInt32(31);
1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1886 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1887 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1878 Variable *T_Lo = makeReg(DestLo->getType()); 1888 Variable *T_Lo = makeReg(DestLo->getType());
1879 if (Src0RM->getType() == IceType_i32) { 1889 if (Src0RM->getType() == IceType_i32) {
1880 _mov(T_Lo, Src0RM); 1890 _mov(T_Lo, Src0RM);
1881 } else if (Src0RM->getType() == IceType_i1) { 1891 } else if (Src0RM->getType() == IceType_i1) {
1882 _movzx(T_Lo, Src0RM); 1892 _movzx(T_Lo, Src0RM);
1883 _shl(T_Lo, Shift); 1893 _shl(T_Lo, Shift);
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1923 case InstCast::Zext: { 1933 case InstCast::Zext: {
1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1934 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1925 if (isVectorType(Dest->getType())) { 1935 if (isVectorType(Dest->getType())) {
1926 // onemask = materialize(1,1,...); dest = onemask & src 1936 // onemask = materialize(1,1,...); dest = onemask & src
1927 Type DestTy = Dest->getType(); 1937 Type DestTy = Dest->getType();
1928 Variable *OneMask = makeVectorOfOnes(DestTy); 1938 Variable *OneMask = makeVectorOfOnes(DestTy);
1929 Variable *T = makeReg(DestTy); 1939 Variable *T = makeReg(DestTy);
1930 _movp(T, Src0RM); 1940 _movp(T, Src0RM);
1931 _pand(T, OneMask); 1941 _pand(T, OneMask);
1932 _movp(Dest, T); 1942 _movp(Dest, T);
1933 } else if (Dest->getType() == IceType_i64) { 1943 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1934 // t1=movzx src; dst.lo=t1; dst.hi=0 1944 // t1=movzx src; dst.lo=t1; dst.hi=0
1935 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1945 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1946 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1947 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1938 Variable *Tmp = makeReg(DestLo->getType()); 1948 Variable *Tmp = makeReg(DestLo->getType());
1939 if (Src0RM->getType() == IceType_i32) { 1949 if (Src0RM->getType() == IceType_i32) {
1940 _mov(Tmp, Src0RM); 1950 _mov(Tmp, Src0RM);
1941 } else { 1951 } else {
1942 _movzx(Tmp, Src0RM); 1952 _movzx(Tmp, Src0RM);
1943 } 1953 }
1944 if (Src0RM->getType() == IceType_i1) { 1954 if (Src0RM->getType() == IceType_i1) {
1945 Constant *One = Ctx->getConstantInt32(1); 1955 Constant *One = Ctx->getConstantInt32(1);
1946 _and(Tmp, One); 1956 _and(Tmp, One);
1947 } 1957 }
1948 _mov(DestLo, Tmp); 1958 _mov(DestLo, Tmp);
1949 _mov(DestHi, Zero); 1959 _mov(DestHi, Zero);
1950 } else if (Src0RM->getType() == IceType_i1) { 1960 } else if (Src0RM->getType() == IceType_i1) {
1951 // t = Src0RM; t &= 1; Dest = t 1961 // t = Src0RM; t &= 1; Dest = t
1952 Constant *One = Ctx->getConstantInt32(1); 1962 Constant *One = Ctx->getConstantInt32(1);
1953 Type DestTy = Dest->getType(); 1963 Type DestTy = Dest->getType();
1954 Variable *T; 1964 Variable *T;
1955 if (DestTy == IceType_i8) { 1965 T = makeReg(IceType_i32);
1956 T = makeReg(DestTy); 1966 _mov(T, Src0RM);
1957 _mov(T, Src0RM); 1967 _and(T, One);
1958 } else { 1968 if (!Traits::Is64Bit) {
1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. 1969 assert(DestTy != IceType_i64);
1960 T = makeReg(IceType_i32); 1970 } else if (DestTy == IceType_i64) {
1961 _movzx(T, Src0RM); 1971 // In x86-64 we should be able to rely on mov reg, reg to zero extend T
1972 // into Dest. At this point we can't ensure Dest will live in a
1973 // register. Therefore, we use _movzx, which the assembler rightly
1974 // converts to a 32-bit mov. A new temporary is created because the
1975 // assembler does not know how to movzx to a memory location.
1976 Variable *T_1 = makeReg(IceType_i64);
1977 _movzx(T_1, T);
1978 T = T_1;
1962 } 1979 }
1963 _and(T, One);
1964 _mov(Dest, T); 1980 _mov(Dest, T);
1965 } else { 1981 } else {
1966 // t1 = movzx src; dst = t1 1982 // t1 = movzx src; dst = t1
1967 Variable *T = makeReg(Dest->getType()); 1983 Variable *T = makeReg(Dest->getType());
1968 _movzx(T, Src0RM); 1984 _movzx(T, Src0RM);
1969 _mov(Dest, T); 1985 _mov(Dest, T);
1970 } 1986 }
1971 break; 1987 break;
1972 } 1988 }
1973 case InstCast::Trunc: { 1989 case InstCast::Trunc: {
1974 if (isVectorType(Dest->getType())) { 1990 if (isVectorType(Dest->getType())) {
1975 // onemask = materialize(1,1,...); dst = src & onemask 1991 // onemask = materialize(1,1,...); dst = src & onemask
1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1992 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1977 Type Src0Ty = Src0RM->getType(); 1993 Type Src0Ty = Src0RM->getType();
1978 Variable *OneMask = makeVectorOfOnes(Src0Ty); 1994 Variable *OneMask = makeVectorOfOnes(Src0Ty);
1979 Variable *T = makeReg(Dest->getType()); 1995 Variable *T = makeReg(Dest->getType());
1980 _movp(T, Src0RM); 1996 _movp(T, Src0RM);
1981 _pand(T, OneMask); 1997 _pand(T, OneMask);
1982 _movp(Dest, T); 1998 _movp(Dest, T);
1983 } else { 1999 } else {
1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2000 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1985 if (Src0->getType() == IceType_i64) 2001 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
1986 Src0 = loOperand(Src0); 2002 Src0 = loOperand(Src0);
1987 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2003 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
1988 // t1 = trunc Src0RM; Dest = t1 2004 // t1 = trunc Src0RM; Dest = t1
1989 Variable *T = nullptr; 2005 Variable *T = nullptr;
1990 _mov(T, Src0RM); 2006 _mov(T, Src0RM);
1991 if (Dest->getType() == IceType_i1) 2007 if (Dest->getType() == IceType_i1)
1992 _and(T, Ctx->getConstantInt1(1)); 2008 _and(T, Ctx->getConstantInt1(1));
1993 _mov(Dest, T); 2009 _mov(Dest, T);
1994 } 2010 }
1995 break; 2011 break;
(...skipping 10 matching lines...) Expand all
2006 case InstCast::Fptosi: 2022 case InstCast::Fptosi:
2007 if (isVectorType(Dest->getType())) { 2023 if (isVectorType(Dest->getType())) {
2008 assert(Dest->getType() == IceType_v4i32 && 2024 assert(Dest->getType() == IceType_v4i32 &&
2009 Inst->getSrc(0)->getType() == IceType_v4f32); 2025 Inst->getSrc(0)->getType() == IceType_v4f32);
2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2026 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2027 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2012 Src0RM = legalizeToReg(Src0RM); 2028 Src0RM = legalizeToReg(Src0RM);
2013 Variable *T = makeReg(Dest->getType()); 2029 Variable *T = makeReg(Dest->getType());
2014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2030 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2015 _movp(Dest, T); 2031 _movp(Dest, T);
2016 } else if (Dest->getType() == IceType_i64) { 2032 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2017 // Use a helper for converting floating-point values to 64-bit 2033 // Use a helper for converting floating-point values to 64-bit
2018 // integers. SSE2 appears to have no way to convert from xmm 2034 // integers. SSE2 appears to have no way to convert from xmm
2019 // registers to something like the edx:eax register pair, and 2035 // registers to something like the edx:eax register pair, and
2020 // gcc and clang both want to use x87 instructions complete with 2036 // gcc and clang both want to use x87 instructions complete with
2021 // temporary manipulation of the status word. This helper is 2037 // temporary manipulation of the status word. This helper is
2022 // not needed for x86-64. 2038 // not needed for x86-64.
2023 split64(Dest); 2039 split64(Dest);
2024 const SizeT MaxSrcs = 1; 2040 const SizeT MaxSrcs = 1;
2025 Type SrcType = Inst->getSrc(0)->getType(); 2041 Type SrcType = Inst->getSrc(0)->getType();
2026 InstCall *Call = 2042 InstCall *Call =
2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2043 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2028 : H_fptosi_f64_i64, 2044 : H_fptosi_f64_i64,
2029 Dest, MaxSrcs); 2045 Dest, MaxSrcs);
2030 Call->addArg(Inst->getSrc(0)); 2046 Call->addArg(Inst->getSrc(0));
2031 lowerCall(Call); 2047 lowerCall(Call);
2032 } else { 2048 } else {
2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2049 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2050 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2035 Variable *T_1 = makeReg(IceType_i32); 2051 Variable *T_1 = nullptr;
2052 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
2053 T_1 = makeReg(IceType_i64);
2054 } else {
2055 assert(Dest->getType() != IceType_i64);
2056 T_1 = makeReg(IceType_i32);
2057 }
2058 // cvt() requires its integer argument to be a GPR.
2059 T_1->setWeightInfinite();
2036 Variable *T_2 = makeReg(Dest->getType()); 2060 Variable *T_2 = makeReg(Dest->getType());
2037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2061 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2062 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2039 if (Dest->getType() == IceType_i1) 2063 if (Dest->getType() == IceType_i1)
2040 _and(T_2, Ctx->getConstantInt1(1)); 2064 _and(T_2, Ctx->getConstantInt1(1));
2041 _mov(Dest, T_2); 2065 _mov(Dest, T_2);
2042 } 2066 }
2043 break; 2067 break;
2044 case InstCast::Fptoui: 2068 case InstCast::Fptoui:
2045 if (isVectorType(Dest->getType())) { 2069 if (isVectorType(Dest->getType())) {
2046 assert(Dest->getType() == IceType_v4i32 && 2070 assert(Dest->getType() == IceType_v4i32 &&
2047 Inst->getSrc(0)->getType() == IceType_v4f32); 2071 Inst->getSrc(0)->getType() == IceType_v4f32);
2048 const SizeT MaxSrcs = 1; 2072 const SizeT MaxSrcs = 1;
2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); 2073 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2050 Call->addArg(Inst->getSrc(0)); 2074 Call->addArg(Inst->getSrc(0));
2051 lowerCall(Call); 2075 lowerCall(Call);
2052 } else if (Dest->getType() == IceType_i64 || 2076 } else if (Dest->getType() == IceType_i64 ||
2053 Dest->getType() == IceType_i32) { 2077 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
2054 // Use a helper for both x86-32 and x86-64. 2078 // Use a helper for both x86-32 and x86-64.
2055 split64(Dest); 2079 if (!Traits::Is64Bit)
2080 split64(Dest);
2056 const SizeT MaxSrcs = 1; 2081 const SizeT MaxSrcs = 1;
2057 Type DestType = Dest->getType(); 2082 Type DestType = Dest->getType();
2058 Type SrcType = Inst->getSrc(0)->getType(); 2083 Type SrcType = Inst->getSrc(0)->getType();
2059 IceString TargetString; 2084 IceString TargetString;
2060 if (isInt32Asserting32Or64(DestType)) { 2085 if (Traits::Is64Bit) {
2086 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
Jim Stichnoth 2015/08/10 19:39:20 Does it make sense to combine the first and third
John 2015/08/10 20:41:17 I would rather not mix the cases. This is more exp
2087 : H_fptoui_f64_i64;
2088 } else if (isInt32Asserting32Or64(DestType)) {
2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 2089 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2062 : H_fptoui_f64_i32; 2090 : H_fptoui_f64_i32;
2063 } else { 2091 } else {
2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2092 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2065 : H_fptoui_f64_i64; 2093 : H_fptoui_f64_i64;
2066 } 2094 }
2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2095 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2068 Call->addArg(Inst->getSrc(0)); 2096 Call->addArg(Inst->getSrc(0));
2069 lowerCall(Call); 2097 lowerCall(Call);
2070 return; 2098 return;
2071 } else { 2099 } else {
2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2100 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2101 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2074 Variable *T_1 = makeReg(IceType_i32); 2102 assert(Dest->getType() != IceType_i64);
2103 Variable *T_1 = nullptr;
2104 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
2105 T_1 = makeReg(IceType_i64);
2106 } else {
2107 assert(Dest->getType() != IceType_i32);
2108 T_1 = makeReg(IceType_i32);
2109 }
2110 T_1->setWeightInfinite();
2075 Variable *T_2 = makeReg(Dest->getType()); 2111 Variable *T_2 = makeReg(Dest->getType());
2076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2112 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2113 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2078 if (Dest->getType() == IceType_i1) 2114 if (Dest->getType() == IceType_i1)
2079 _and(T_2, Ctx->getConstantInt1(1)); 2115 _and(T_2, Ctx->getConstantInt1(1));
2080 _mov(Dest, T_2); 2116 _mov(Dest, T_2);
2081 } 2117 }
2082 break; 2118 break;
2083 case InstCast::Sitofp: 2119 case InstCast::Sitofp:
2084 if (isVectorType(Dest->getType())) { 2120 if (isVectorType(Dest->getType())) {
2085 assert(Dest->getType() == IceType_v4f32 && 2121 assert(Dest->getType() == IceType_v4f32 &&
2086 Inst->getSrc(0)->getType() == IceType_v4i32); 2122 Inst->getSrc(0)->getType() == IceType_v4i32);
2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2123 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2124 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2089 Src0RM = legalizeToReg(Src0RM); 2125 Src0RM = legalizeToReg(Src0RM);
2090 Variable *T = makeReg(Dest->getType()); 2126 Variable *T = makeReg(Dest->getType());
2091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2127 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2092 _movp(Dest, T); 2128 _movp(Dest, T);
2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) { 2129 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2094 // Use a helper for x86-32. 2130 // Use a helper for x86-32.
2095 const SizeT MaxSrcs = 1; 2131 const SizeT MaxSrcs = 1;
2096 Type DestType = Dest->getType(); 2132 Type DestType = Dest->getType();
2097 InstCall *Call = 2133 InstCall *Call =
2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2134 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2099 : H_sitofp_i64_f64, 2135 : H_sitofp_i64_f64,
2100 Dest, MaxSrcs); 2136 Dest, MaxSrcs);
2101 // TODO: Call the correct compiler-rt helper function. 2137 // TODO: Call the correct compiler-rt helper function.
2102 Call->addArg(Inst->getSrc(0)); 2138 Call->addArg(Inst->getSrc(0));
2103 lowerCall(Call); 2139 lowerCall(Call);
2104 return; 2140 return;
2105 } else { 2141 } else {
2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2142 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2107 // Sign-extend the operand. 2143 // Sign-extend the operand.
2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2144 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2109 Variable *T_1 = makeReg(IceType_i32); 2145 Variable *T_1 = nullptr;
2146 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2147 T_1 = makeReg(IceType_i64);
2148 } else {
2149 assert(Src0RM->getType() != IceType_i64);
2150 T_1 = makeReg(IceType_i32);
2151 }
2152 T_1->setWeightInfinite();
2110 Variable *T_2 = makeReg(Dest->getType()); 2153 Variable *T_2 = makeReg(Dest->getType());
2111 if (Src0RM->getType() == IceType_i32) 2154 if (Src0RM->getType() == T_1->getType())
2112 _mov(T_1, Src0RM); 2155 _mov(T_1, Src0RM);
2113 else 2156 else
2114 _movsx(T_1, Src0RM); 2157 _movsx(T_1, Src0RM);
2115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2158 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2116 _mov(Dest, T_2); 2159 _mov(Dest, T_2);
2117 } 2160 }
2118 break; 2161 break;
2119 case InstCast::Uitofp: { 2162 case InstCast::Uitofp: {
2120 Operand *Src0 = Inst->getSrc(0); 2163 Operand *Src0 = Inst->getSrc(0);
2121 if (isVectorType(Src0->getType())) { 2164 if (isVectorType(Src0->getType())) {
2122 assert(Dest->getType() == IceType_v4f32 && 2165 assert(Dest->getType() == IceType_v4f32 &&
2123 Src0->getType() == IceType_v4i32); 2166 Src0->getType() == IceType_v4i32);
2124 const SizeT MaxSrcs = 1; 2167 const SizeT MaxSrcs = 1;
2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2168 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2126 Call->addArg(Src0); 2169 Call->addArg(Src0);
2127 lowerCall(Call); 2170 lowerCall(Call);
2128 } else if (Src0->getType() == IceType_i64 || 2171 } else if (Src0->getType() == IceType_i64 ||
2129 Src0->getType() == IceType_i32) { 2172 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2130 // Use a helper for x86-32 and x86-64. Also use a helper for 2173 // Use a helper for x86-32 and x86-64. Also use a helper for
2131 // i32 on x86-32. 2174 // i32 on x86-32.
2132 const SizeT MaxSrcs = 1; 2175 const SizeT MaxSrcs = 1;
2133 Type DestType = Dest->getType(); 2176 Type DestType = Dest->getType();
2134 IceString TargetString; 2177 IceString TargetString;
2135 if (isInt32Asserting32Or64(Src0->getType())) { 2178 if (isInt32Asserting32Or64(Src0->getType())) {
2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2179 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2137 : H_uitofp_i32_f64; 2180 : H_uitofp_i32_f64;
2138 } else { 2181 } else {
2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2182 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2140 : H_uitofp_i64_f64; 2183 : H_uitofp_i64_f64;
2141 } 2184 }
2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2185 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2143 Call->addArg(Src0); 2186 Call->addArg(Src0);
2144 lowerCall(Call); 2187 lowerCall(Call);
2145 return; 2188 return;
2146 } else { 2189 } else {
2147 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2190 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2148 // Zero-extend the operand. 2191 // Zero-extend the operand.
2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2192 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2150 Variable *T_1 = makeReg(IceType_i32); 2193 Variable *T_1 = nullptr;
2194 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2195 T_1 = makeReg(IceType_i64);
2196 } else {
2197 assert(Src0RM->getType() != IceType_i64);
2198 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2199 T_1 = makeReg(IceType_i32);
2200 }
2201 T_1->setWeightInfinite();
2151 Variable *T_2 = makeReg(Dest->getType()); 2202 Variable *T_2 = makeReg(Dest->getType());
2152 if (Src0RM->getType() == IceType_i32) 2203 if (Src0RM->getType() == T_1->getType())
2153 _mov(T_1, Src0RM); 2204 _mov(T_1, Src0RM);
2154 else 2205 else
2155 _movzx(T_1, Src0RM); 2206 _movzx(T_1, Src0RM);
2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2207 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2157 _mov(Dest, T_2); 2208 _mov(Dest, T_2);
2158 } 2209 }
2159 break; 2210 break;
2160 } 2211 }
2161 case InstCast::Bitcast: { 2212 case InstCast::Bitcast: {
2162 Operand *Src0 = Inst->getSrc(0); 2213 Operand *Src0 = Inst->getSrc(0);
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
2198 typename Traits::SpillVariable *SpillVar = 2249 typename Traits::SpillVariable *SpillVar =
2199 Func->makeVariable<typename Traits::SpillVariable>(SrcType); 2250 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
2200 SpillVar->setLinkedTo(Dest); 2251 SpillVar->setLinkedTo(Dest);
2201 Variable *Spill = SpillVar; 2252 Variable *Spill = SpillVar;
2202 Spill->setWeight(RegWeight::Zero); 2253 Spill->setWeight(RegWeight::Zero);
2203 _mov(T, Src0RM); 2254 _mov(T, Src0RM);
2204 _mov(Spill, T); 2255 _mov(Spill, T);
2205 _mov(Dest, Spill); 2256 _mov(Dest, Spill);
2206 } break; 2257 } break;
2207 case IceType_i64: { 2258 case IceType_i64: {
2208 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2259 assert(Src0->getType() == IceType_f64);
2209 assert(Src0RM->getType() == IceType_f64); 2260 if (Traits::Is64Bit) {
2210 // a.i64 = bitcast b.f64 ==> 2261 // Movd requires its fp argument (in this case, the bitcast source) to
2211 // s.f64 = spill b.f64 2262 // be an xmm register.
2212 // t_lo.i32 = lo(s.f64) 2263 Operand *Src0R = legalize(Src0, Legal_Reg);
Jim Stichnoth 2015/08/10 19:39:20 Maybe this? Variable *Src0R = legalizeToReg(Src0
John 2015/08/10 20:41:17 Done.
2213 // a_lo.i32 = t_lo.i32 2264 Variable *T = makeReg(IceType_i64);
2214 // t_hi.i32 = hi(s.f64) 2265 _movd(T, Src0R);
2215 // a_hi.i32 = t_hi.i32 2266 _mov(Dest, T);
2216 Operand *SpillLo, *SpillHi; 2267 } else {
2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { 2268 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2269 // a.i64 = bitcast b.f64 ==>
2270 // s.f64 = spill b.f64
2271 // t_lo.i32 = lo(s.f64)
2272 // a_lo.i32 = t_lo.i32
2273 // t_hi.i32 = hi(s.f64)
2274 // a_hi.i32 = t_hi.i32
2275 Operand *SpillLo, *SpillHi;
2276 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2277 typename Traits::SpillVariable *SpillVar =
2278 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2279 SpillVar->setLinkedTo(Src0Var);
2280 Variable *Spill = SpillVar;
2281 Spill->setWeight(RegWeight::Zero);
2282 _movq(Spill, Src0RM);
2283 SpillLo = Traits::VariableSplit::create(Func, Spill,
2284 Traits::VariableSplit::Low);
2285 SpillHi = Traits::VariableSplit::create(Func, Spill,
2286 Traits::VariableSplit::High);
2287 } else {
2288 SpillLo = loOperand(Src0RM);
2289 SpillHi = hiOperand(Src0RM);
2290 }
2291
2292 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2293 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2294 Variable *T_Lo = makeReg(IceType_i32);
2295 Variable *T_Hi = makeReg(IceType_i32);
2296
2297 _mov(T_Lo, SpillLo);
2298 _mov(DestLo, T_Lo);
2299 _mov(T_Hi, SpillHi);
2300 _mov(DestHi, T_Hi);
2301 }
2302 } break;
2303 case IceType_f64: {
2304 assert(Src0->getType() == IceType_i64);
2305 if (Traits::Is64Bit) {
2306 Operand *Src0R = legalize(Src0, Legal_Reg | Legal_Mem);
Jim Stichnoth 2015/08/10 19:39:20 Name this Src0RM
John 2015/08/10 20:41:17 Done.
2307 Variable *T = makeReg(IceType_f64);
2308 // Movd requires its fp argument (in this case, the bitcast destination)
2309 // to be an xmm register.
2310 T->setWeightInfinite();
2311 _movd(T, Src0R);
2312 _mov(Dest, T);
2313 } else {
2314 Src0 = legalize(Src0);
2315 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2316 Variable *T = Func->makeVariable(Dest->getType());
2317 _movq(T, Src0);
2318 _movq(Dest, T);
2319 break;
2320 }
2321 // a.f64 = bitcast b.i64 ==>
2322 // t_lo.i32 = b_lo.i32
2323 // FakeDef(s.f64)
2324 // lo(s.f64) = t_lo.i32
2325 // t_hi.i32 = b_hi.i32
2326 // hi(s.f64) = t_hi.i32
2327 // a.f64 = s.f64
2218 typename Traits::SpillVariable *SpillVar = 2328 typename Traits::SpillVariable *SpillVar =
2219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64); 2329 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2220 SpillVar->setLinkedTo(Src0Var); 2330 SpillVar->setLinkedTo(Dest);
2221 Variable *Spill = SpillVar; 2331 Variable *Spill = SpillVar;
2222 Spill->setWeight(RegWeight::Zero); 2332 Spill->setWeight(RegWeight::Zero);
2223 _movq(Spill, Src0RM); 2333
2224 SpillLo = Traits::VariableSplit::create(Func, Spill, 2334 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2225 Traits::VariableSplit::Low); 2335 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2226 SpillHi = Traits::VariableSplit::create(Func, Spill, 2336 Func, Spill, Traits::VariableSplit::Low);
2227 Traits::VariableSplit::High); 2337 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2228 } else { 2338 Func, Spill, Traits::VariableSplit::High);
2229 SpillLo = loOperand(Src0RM); 2339 _mov(T_Lo, loOperand(Src0));
2230 SpillHi = hiOperand(Src0RM); 2340 // Technically, the Spill is defined after the _store happens, but
2341 // SpillLo is considered a "use" of Spill so define Spill before it
2342 // is used.
2343 Context.insert(InstFakeDef::create(Func, Spill));
2344 _store(T_Lo, SpillLo);
2345 _mov(T_Hi, hiOperand(Src0));
2346 _store(T_Hi, SpillHi);
2347 _movq(Dest, Spill);
2231 } 2348 }
2232
2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2235 Variable *T_Lo = makeReg(IceType_i32);
2236 Variable *T_Hi = makeReg(IceType_i32);
2237
2238 _mov(T_Lo, SpillLo);
2239 _mov(DestLo, T_Lo);
2240 _mov(T_Hi, SpillHi);
2241 _mov(DestHi, T_Hi);
2242 } break;
2243 case IceType_f64: {
2244 Src0 = legalize(Src0);
2245 assert(Src0->getType() == IceType_i64);
2246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2247 Variable *T = Func->makeVariable(Dest->getType());
2248 _movq(T, Src0);
2249 _movq(Dest, T);
2250 break;
2251 }
2252 // a.f64 = bitcast b.i64 ==>
2253 // t_lo.i32 = b_lo.i32
2254 // FakeDef(s.f64)
2255 // lo(s.f64) = t_lo.i32
2256 // t_hi.i32 = b_hi.i32
2257 // hi(s.f64) = t_hi.i32
2258 // a.f64 = s.f64
2259 typename Traits::SpillVariable *SpillVar =
2260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
2261 SpillVar->setLinkedTo(Dest);
2262 Variable *Spill = SpillVar;
2263 Spill->setWeight(RegWeight::Zero);
2264
2265 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2267 Func, Spill, Traits::VariableSplit::Low);
2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2269 Func, Spill, Traits::VariableSplit::High);
2270 _mov(T_Lo, loOperand(Src0));
2271 // Technically, the Spill is defined after the _store happens, but
2272 // SpillLo is considered a "use" of Spill so define Spill before it
2273 // is used.
2274 Context.insert(InstFakeDef::create(Func, Spill));
2275 _store(T_Lo, SpillLo);
2276 _mov(T_Hi, hiOperand(Src0));
2277 _store(T_Hi, SpillHi);
2278 _movq(Dest, Spill);
2279 } break; 2349 } break;
2280 case IceType_v8i1: { 2350 case IceType_v8i1: {
2281 assert(Src0->getType() == IceType_i8); 2351 assert(Src0->getType() == IceType_i8);
2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); 2352 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
2283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType()); 2353 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2284 // Arguments to functions are required to be at least 32 bits wide. 2354 // Arguments to functions are required to be at least 32 bits wide.
2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0)); 2355 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2286 Call->addArg(Src0AsI32); 2356 Call->addArg(Src0AsI32);
2287 lowerCall(Call); 2357 lowerCall(Call);
2288 } break; 2358 } break;
(...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after
2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2678 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2609 _pxor(T, MinusOne); 2679 _pxor(T, MinusOne);
2610 } break; 2680 } break;
2611 } 2681 }
2612 2682
2613 _movp(Dest, T); 2683 _movp(Dest, T);
2614 eliminateNextVectorSextInstruction(Dest); 2684 eliminateNextVectorSextInstruction(Dest);
2615 return; 2685 return;
2616 } 2686 }
2617 2687
2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2688 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
2619 if (Src0->getType() == IceType_i64) { 2689 lowerIcmp64(Inst);
2620 InstIcmp::ICond Condition = Inst->getCondition();
2621 size_t Index = static_cast<size_t>(Condition);
2622 assert(Index < Traits::TableIcmp64Size);
2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2627 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2628 Constant *One = Ctx->getConstantInt32(1);
2629 typename Traits::Insts::Label *LabelFalse =
2630 Traits::Insts::Label::create(Func, this);
2631 typename Traits::Insts::Label *LabelTrue =
2632 Traits::Insts::Label::create(Func, this);
2633 _mov(Dest, One);
2634 _cmp(Src0HiRM, Src1HiRI);
2635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2636 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2638 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2639 _cmp(Src0LoRM, Src1LoRI);
2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2641 Context.insert(LabelFalse);
2642 _mov_nonkillable(Dest, Zero);
2643 Context.insert(LabelTrue);
2644 return; 2690 return;
2645 } 2691 }
2646 2692
2647 // cmp b, c 2693 // cmp b, c
2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 2694 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
2649 _cmp(Src0RM, Src1); 2695 _cmp(Src0RM, Src1);
2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition())); 2696 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
2651 } 2697 }
2652 2698
2699 template <typename Machine>
2700 template <typename T>
2701 typename std::enable_if<!T::Is64Bit, void>::type
2702 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
2703 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2704 Operand *Src0 = legalize(Inst->getSrc(0));
2705 Operand *Src1 = legalize(Inst->getSrc(1));
2706 Variable *Dest = Inst->getDest();
2707 InstIcmp::ICond Condition = Inst->getCondition();
2708 size_t Index = static_cast<size_t>(Condition);
2709 assert(Index < Traits::TableIcmp64Size);
2710 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2711 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2712 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2713 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2714 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2715 Constant *One = Ctx->getConstantInt32(1);
2716 typename Traits::Insts::Label *LabelFalse =
2717 Traits::Insts::Label::create(Func, this);
2718 typename Traits::Insts::Label *LabelTrue =
2719 Traits::Insts::Label::create(Func, this);
2720 _mov(Dest, One);
2721 _cmp(Src0HiRM, Src1HiRI);
2722 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
2723 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
2724 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
2725 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
2726 _cmp(Src0LoRM, Src1LoRI);
2727 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
2728 Context.insert(LabelFalse);
2729 _mov_nonkillable(Dest, Zero);
2730 Context.insert(LabelTrue);
2731 }
2732
2653 template <class Machine> 2733 template <class Machine>
2654 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 2734 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2655 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2735 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 2736 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2657 ConstantInteger32 *ElementIndex = 2737 ConstantInteger32 *ElementIndex =
2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 2738 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2659 // Only constant indices are allowed in PNaCl IR. 2739 // Only constant indices are allowed in PNaCl IR.
2660 assert(ElementIndex); 2740 assert(ElementIndex);
2661 unsigned Index = ElementIndex->getValue(); 2741 unsigned Index = ElementIndex->getValue();
2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 2742 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
2841 } 2921 }
2842 case Intrinsics::AtomicLoad: { 2922 case Intrinsics::AtomicLoad: {
2843 // We require the memory address to be naturally aligned. 2923 // We require the memory address to be naturally aligned.
2844 // Given that is the case, then normal loads are atomic. 2924 // Given that is the case, then normal loads are atomic.
2845 if (!Intrinsics::isMemoryOrderValid( 2925 if (!Intrinsics::isMemoryOrderValid(
2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) { 2926 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2847 Func->setError("Unexpected memory ordering for AtomicLoad"); 2927 Func->setError("Unexpected memory ordering for AtomicLoad");
2848 return; 2928 return;
2849 } 2929 }
2850 Variable *Dest = Instr->getDest(); 2930 Variable *Dest = Instr->getDest();
2851 if (Dest->getType() == IceType_i64) { 2931 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2852 // Follow what GCC does and use a movq instead of what lowerLoad() 2932 // Follow what GCC does and use a movq instead of what lowerLoad()
2853 // normally does (split the load into two). 2933 // normally does (split the load into two).
2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 2934 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2855 // can't happen anyway, since this is x86-32 and integer arithmetic only 2935 // can't happen anyway, since this is x86-32 and integer arithmetic only
2856 // happens on 32-bit quantities. 2936 // happens on 32-bit quantities.
2857 Variable *T = makeReg(IceType_f64); 2937 Variable *T = makeReg(IceType_f64);
2858 typename Traits::X86OperandMem *Addr = 2938 typename Traits::X86OperandMem *Addr =
2859 formMemoryOperand(Instr->getArg(0), IceType_f64); 2939 formMemoryOperand(Instr->getArg(0), IceType_f64);
2860 _movq(T, Addr); 2940 _movq(T, Addr);
2861 // Then cast the bits back out of the XMM register to the i64 Dest. 2941 // Then cast the bits back out of the XMM register to the i64 Dest.
2862 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); 2942 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2863 lowerCast(Cast); 2943 lowerCast(Cast);
2864 // Make sure that the atomic load isn't elided when unused. 2944 // Make sure that the atomic load isn't elided when unused.
2865 Context.insert(InstFakeUse::create(Func, Dest->getLo())); 2945 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2866 Context.insert(InstFakeUse::create(Func, Dest->getHi())); 2946 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2867 return; 2947 return;
2868 } 2948 }
2869 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); 2949 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2870 lowerLoad(Load); 2950 lowerLoad(Load);
2871 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. 2951 // Make sure the atomic load isn't elided when unused, by adding a
Jim Stichnoth 2015/08/10 19:39:19 hmm, why is a linebreak added?
John 2015/08/10 20:41:17 make format.
Jim Stichnoth 2015/08/11 16:01:36 Oh, so something like, you added an outer layer of
John 2015/08/12 19:27:55 Probably. I find annoying that clang format will b
2952 // FakeUse.
2872 // Since lowerLoad may fuse the load w/ an arithmetic instruction, 2953 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2873 // insert the FakeUse on the last-inserted instruction's dest. 2954 // insert the FakeUse on the last-inserted instruction's dest.
2874 Context.insert( 2955 Context.insert(
2875 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 2956 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2876 return; 2957 return;
2877 } 2958 }
2878 case Intrinsics::AtomicRMW: 2959 case Intrinsics::AtomicRMW:
2879 if (!Intrinsics::isMemoryOrderValid( 2960 if (!Intrinsics::isMemoryOrderValid(
2880 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 2961 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
2881 Func->setError("Unexpected memory ordering for AtomicRMW"); 2962 Func->setError("Unexpected memory ordering for AtomicRMW");
2882 return; 2963 return;
2883 } 2964 }
2884 lowerAtomicRMW( 2965 lowerAtomicRMW(
2885 Instr->getDest(), 2966 Instr->getDest(),
2886 static_cast<uint32_t>( 2967 static_cast<uint32_t>(
2887 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), 2968 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
2888 Instr->getArg(1), Instr->getArg(2)); 2969 Instr->getArg(1), Instr->getArg(2));
2889 return; 2970 return;
2890 case Intrinsics::AtomicStore: { 2971 case Intrinsics::AtomicStore: {
2891 if (!Intrinsics::isMemoryOrderValid( 2972 if (!Intrinsics::isMemoryOrderValid(
2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 2973 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2893 Func->setError("Unexpected memory ordering for AtomicStore"); 2974 Func->setError("Unexpected memory ordering for AtomicStore");
2894 return; 2975 return;
2895 } 2976 }
2896 // We require the memory address to be naturally aligned. 2977 // We require the memory address to be naturally aligned.
2897 // Given that is the case, then normal stores are atomic. 2978 // Given that is the case, then normal stores are atomic.
2898 // Add a fence after the store to make it visible. 2979 // Add a fence after the store to make it visible.
2899 Operand *Value = Instr->getArg(0); 2980 Operand *Value = Instr->getArg(0);
2900 Operand *Ptr = Instr->getArg(1); 2981 Operand *Ptr = Instr->getArg(1);
2901 if (Value->getType() == IceType_i64) { 2982 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
2902 // Use a movq instead of what lowerStore() normally does 2983 // Use a movq instead of what lowerStore() normally does
2903 // (split the store into two), following what GCC does. 2984 // (split the store into two), following what GCC does.
2904 // Cast the bits from int -> to an xmm register first. 2985 // Cast the bits from int -> to an xmm register first.
2905 Variable *T = makeReg(IceType_f64); 2986 Variable *T = makeReg(IceType_f64);
2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2907 lowerCast(Cast); 2988 lowerCast(Cast);
2908 // Then store XMM w/ a movq. 2989 // Then store XMM w/ a movq.
2909 typename Traits::X86OperandMem *Addr = 2990 typename Traits::X86OperandMem *Addr =
2910 formMemoryOperand(Ptr, IceType_f64); 2991 formMemoryOperand(Ptr, IceType_f64);
2911 _storeq(T, Addr); 2992 _storeq(T, Addr);
2912 _mfence(); 2993 _mfence();
2913 return; 2994 return;
2914 } 2995 }
2915 InstStore *Store = InstStore::create(Func, Value, Ptr); 2996 InstStore *Store = InstStore::create(Func, Value, Ptr);
2916 lowerStore(Store); 2997 lowerStore(Store);
2917 _mfence(); 2998 _mfence();
2918 return; 2999 return;
2919 } 3000 }
2920 case Intrinsics::Bswap: { 3001 case Intrinsics::Bswap: {
2921 Variable *Dest = Instr->getDest(); 3002 Variable *Dest = Instr->getDest();
2922 Operand *Val = Instr->getArg(0); 3003 Operand *Val = Instr->getArg(0);
2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the 3004 // In 32-bit mode, bswap only works on 32-bit arguments, and the
2924 // argument must be a register. Use rotate left for 16-bit bswap. 3005 // argument must be a register. Use rotate left for 16-bit bswap.
2925 if (Val->getType() == IceType_i64) { 3006 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2926 Val = legalizeUndef(Val); 3007 Val = legalizeUndef(Val);
2927 Variable *T_Lo = legalizeToReg(loOperand(Val)); 3008 Variable *T_Lo = legalizeToReg(loOperand(Val));
2928 Variable *T_Hi = legalizeToReg(hiOperand(Val)); 3009 Variable *T_Hi = legalizeToReg(hiOperand(Val));
2929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2931 _bswap(T_Lo); 3012 _bswap(T_Lo);
2932 _bswap(T_Hi); 3013 _bswap(T_Hi);
2933 _mov(DestLo, T_Hi); 3014 _mov(DestLo, T_Hi);
2934 _mov(DestHi, T_Lo); 3015 _mov(DestHi, T_Lo);
2935 } else if (Val->getType() == IceType_i32) { 3016 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
3017 Val->getType() == IceType_i32) {
2936 Variable *T = legalizeToReg(Val); 3018 Variable *T = legalizeToReg(Val);
2937 _bswap(T); 3019 _bswap(T);
2938 _mov(Dest, T); 3020 _mov(Dest, T);
2939 } else { 3021 } else {
2940 assert(Val->getType() == IceType_i16); 3022 assert(Val->getType() == IceType_i16);
2941 Constant *Eight = Ctx->getConstantInt16(8); 3023 Constant *Eight = Ctx->getConstantInt16(8);
2942 Variable *T = nullptr; 3024 Variable *T = nullptr;
2943 Val = legalize(Val); 3025 Val = legalize(Val);
2944 _mov(T, Val); 3026 _mov(T, Val);
2945 _rol(T, Eight); 3027 _rol(T, Eight);
2946 _mov(Dest, T); 3028 _mov(Dest, T);
2947 } 3029 }
2948 return; 3030 return;
2949 } 3031 }
2950 case Intrinsics::Ctpop: { 3032 case Intrinsics::Ctpop: {
2951 Variable *Dest = Instr->getDest(); 3033 Variable *Dest = Instr->getDest();
3034 Variable *T = nullptr;
2952 Operand *Val = Instr->getArg(0); 3035 Operand *Val = Instr->getArg(0);
2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 3036 Type ValTy = Val->getType();
2954 ? H_call_ctpop_i32 3037 assert(ValTy == IceType_i32 || ValTy == IceType_i64);
2955 : H_call_ctpop_i64, 3038
2956 Dest, 1); 3039 if (!Traits::Is64Bit) {
3040 T = Dest;
3041 } else {
3042 T = makeReg(IceType_i64);
3043 if (ValTy == IceType_i32) {
3044 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
3045 // converting it to a 64-bit value, and using ctpop_i64. _movzx should
3046 // ensure we will not have any bits set on Val's upper 32 bits.
3047 Variable *V = makeReg(IceType_i64);
3048 _movzx(V, Val);
3049 Val = V;
3050 }
3051 ValTy = IceType_i64;
3052 }
3053
3054 InstCall *Call = makeHelperCall(
3055 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
2957 Call->addArg(Val); 3056 Call->addArg(Val);
2958 lowerCall(Call); 3057 lowerCall(Call);
2959 // The popcount helpers always return 32-bit values, while the intrinsic's 3058 // The popcount helpers always return 32-bit values, while the intrinsic's
2960 // signature matches the native POPCNT instruction and fills a 64-bit reg 3059 // signature matches the native POPCNT instruction and fills a 64-bit reg
2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case 3060 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
2962 // the user doesn't do that in the IR. If the user does that in the IR, 3061 // the user doesn't do that in the IR. If the user does that in the IR,
2963 // then this zero'ing instruction is dead and gets optimized out. 3062 // then this zero'ing instruction is dead and gets optimized out.
2964 if (Val->getType() == IceType_i64) { 3063 if (!Traits::Is64Bit) {
2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3064 assert(T == Dest);
2966 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3065 if (Val->getType() == IceType_i64) {
2967 _mov(DestHi, Zero); 3066 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3067 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3068 _mov(DestHi, Zero);
3069 }
3070 } else {
3071 assert(Val->getType() == IceType_i64);
3072 // T is 64 bit. It needs to be copied to dest. We need to:
3073 //
3074 // T_1.32 = trunc T.64 to i32
3075 // T_2.64 = zext T_1.32 to i64
3076 // Dest.<<right_size>> = T_2.<<right_size>>
3077 //
3078 // which ensures the upper 32 bits will always be cleared. Just doing a
3079 //
3080 // mov Dest.32 = trunc T.32 to i32
3081 //
3082 // is dangerous because there's a chance the copiler will optimize this
Jim Stichnoth 2015/08/10 19:39:20 compiler
John 2015/08/10 20:41:17 Done.
3083 // copy out. To use _movzx we need two new registers (one 32-, and
3084 // another 64-bit wide.)
3085 Variable *T_1 = makeReg(IceType_i32);
3086 _mov(T_1, T);
3087 Variable *T_2 = makeReg(IceType_i64);
3088 _movzx(T_2, T_1);
3089 _mov(Dest, T_2);
2968 } 3090 }
2969 return; 3091 return;
2970 } 3092 }
2971 case Intrinsics::Ctlz: { 3093 case Intrinsics::Ctlz: {
2972 // The "is zero undef" parameter is ignored and we always return 3094 // The "is zero undef" parameter is ignored and we always return
2973 // a well-defined value. 3095 // a well-defined value.
2974 Operand *Val = legalize(Instr->getArg(0)); 3096 Operand *Val = legalize(Instr->getArg(0));
2975 Operand *FirstVal; 3097 Operand *FirstVal;
2976 Operand *SecondVal = nullptr; 3098 Operand *SecondVal = nullptr;
2977 if (Val->getType() == IceType_i64) { 3099 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2978 FirstVal = loOperand(Val); 3100 FirstVal = loOperand(Val);
2979 SecondVal = hiOperand(Val); 3101 SecondVal = hiOperand(Val);
2980 } else { 3102 } else {
2981 FirstVal = Val; 3103 FirstVal = Val;
2982 } 3104 }
2983 const bool IsCttz = false; 3105 const bool IsCttz = false;
2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3106 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
2985 SecondVal); 3107 SecondVal);
2986 return; 3108 return;
2987 } 3109 }
2988 case Intrinsics::Cttz: { 3110 case Intrinsics::Cttz: {
2989 // The "is zero undef" parameter is ignored and we always return 3111 // The "is zero undef" parameter is ignored and we always return
2990 // a well-defined value. 3112 // a well-defined value.
2991 Operand *Val = legalize(Instr->getArg(0)); 3113 Operand *Val = legalize(Instr->getArg(0));
2992 Operand *FirstVal; 3114 Operand *FirstVal;
2993 Operand *SecondVal = nullptr; 3115 Operand *SecondVal = nullptr;
2994 if (Val->getType() == IceType_i64) { 3116 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2995 FirstVal = hiOperand(Val); 3117 FirstVal = hiOperand(Val);
2996 SecondVal = loOperand(Val); 3118 SecondVal = loOperand(Val);
2997 } else { 3119 } else {
2998 FirstVal = Val; 3120 FirstVal = Val;
2999 } 3121 }
3000 const bool IsCttz = true; 3122 const bool IsCttz = true;
3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3123 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3002 SecondVal); 3124 SecondVal);
3003 return; 3125 return;
3004 } 3126 }
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
3098 Func->setError("Should not be lowering UnknownIntrinsic"); 3220 Func->setError("Should not be lowering UnknownIntrinsic");
3099 return; 3221 return;
3100 } 3222 }
3101 return; 3223 return;
3102 } 3224 }
3103 3225
3104 template <class Machine> 3226 template <class Machine>
3105 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3227 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3106 Operand *Ptr, Operand *Expected, 3228 Operand *Ptr, Operand *Expected,
3107 Operand *Desired) { 3229 Operand *Desired) {
3108 if (Expected->getType() == IceType_i64) { 3230 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
3109 // Reserve the pre-colored registers first, before adding any more 3231 // Reserve the pre-colored registers first, before adding any more
3110 // infinite-weight variables from formMemoryOperand's legalization. 3232 // infinite-weight variables from formMemoryOperand's legalization.
3111 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3233 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3112 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3234 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3113 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3235 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3114 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3236 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3115 _mov(T_eax, loOperand(Expected)); 3237 _mov(T_eax, loOperand(Expected));
3116 _mov(T_edx, hiOperand(Expected)); 3238 _mov(T_edx, hiOperand(Expected));
3117 _mov(T_ebx, loOperand(Desired)); 3239 _mov(T_ebx, loOperand(Desired));
3118 _mov(T_ecx, hiOperand(Desired)); 3240 _mov(T_ecx, hiOperand(Desired));
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
3157 // [%y_phi = ...] // list of phi stores 3279 // [%y_phi = ...] // list of phi stores
3158 // br eq, %l1, %l2 3280 // br eq, %l1, %l2
3159 InstList::iterator I = Context.getCur(); 3281 InstList::iterator I = Context.getCur();
3160 // I is currently the InstIntrinsicCall. Peek past that. 3282 // I is currently the InstIntrinsicCall. Peek past that.
3161 // This assumes that the atomic cmpxchg has not been lowered yet, 3283 // This assumes that the atomic cmpxchg has not been lowered yet,
3162 // so that the instructions seen in the scan from "Cur" is simple. 3284 // so that the instructions seen in the scan from "Cur" is simple.
3163 assert(llvm::isa<InstIntrinsicCall>(*I)); 3285 assert(llvm::isa<InstIntrinsicCall>(*I));
3164 Inst *NextInst = Context.getNextInst(I); 3286 Inst *NextInst = Context.getNextInst(I);
3165 if (!NextInst) 3287 if (!NextInst)
3166 return false; 3288 return false;
3167 // There might be phi assignments right before the compare+branch, since this 3289 // There might be phi assignments right before the compare+branch, since
Jim Stichnoth 2015/08/10 19:39:20 reformat
John 2015/08/10 20:41:17 Done.
3290 // this
3168 // could be a backward branch for a loop. This placement of assignments is 3291 // could be a backward branch for a loop. This placement of assignments is
3169 // determined by placePhiStores(). 3292 // determined by placePhiStores().
3170 std::vector<InstAssign *> PhiAssigns; 3293 std::vector<InstAssign *> PhiAssigns;
3171 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) { 3294 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3172 if (PhiAssign->getDest() == Dest) 3295 if (PhiAssign->getDest() == Dest)
3173 return false; 3296 return false;
3174 PhiAssigns.push_back(PhiAssign); 3297 PhiAssigns.push_back(PhiAssign);
3175 NextInst = Context.getNextInst(I); 3298 NextInst = Context.getNextInst(I);
3176 if (!NextInst) 3299 if (!NextInst)
3177 return false; 3300 return false;
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
3216 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3339 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
3217 Operand *Ptr, Operand *Val) { 3340 Operand *Ptr, Operand *Val) {
3218 bool NeedsCmpxchg = false; 3341 bool NeedsCmpxchg = false;
3219 LowerBinOp Op_Lo = nullptr; 3342 LowerBinOp Op_Lo = nullptr;
3220 LowerBinOp Op_Hi = nullptr; 3343 LowerBinOp Op_Hi = nullptr;
3221 switch (Operation) { 3344 switch (Operation) {
3222 default: 3345 default:
3223 Func->setError("Unknown AtomicRMW operation"); 3346 Func->setError("Unknown AtomicRMW operation");
3224 return; 3347 return;
3225 case Intrinsics::AtomicAdd: { 3348 case Intrinsics::AtomicAdd: {
3226 if (Dest->getType() == IceType_i64) { 3349 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3227 // All the fall-through paths must set this to true, but use this 3350 // All the fall-through paths must set this to true, but use this
3228 // for asserting. 3351 // for asserting.
3229 NeedsCmpxchg = true; 3352 NeedsCmpxchg = true;
3230 Op_Lo = &TargetX86Base<Machine>::_add; 3353 Op_Lo = &TargetX86Base<Machine>::_add;
3231 Op_Hi = &TargetX86Base<Machine>::_adc; 3354 Op_Hi = &TargetX86Base<Machine>::_adc;
3232 break; 3355 break;
3233 } 3356 }
3234 typename Traits::X86OperandMem *Addr = 3357 typename Traits::X86OperandMem *Addr =
3235 formMemoryOperand(Ptr, Dest->getType()); 3358 formMemoryOperand(Ptr, Dest->getType());
3236 const bool Locked = true; 3359 const bool Locked = true;
3237 Variable *T = nullptr; 3360 Variable *T = nullptr;
3238 _mov(T, Val); 3361 _mov(T, Val);
3239 _xadd(Addr, T, Locked); 3362 _xadd(Addr, T, Locked);
3240 _mov(Dest, T); 3363 _mov(Dest, T);
3241 return; 3364 return;
3242 } 3365 }
3243 case Intrinsics::AtomicSub: { 3366 case Intrinsics::AtomicSub: {
3244 if (Dest->getType() == IceType_i64) { 3367 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3245 NeedsCmpxchg = true; 3368 NeedsCmpxchg = true;
3246 Op_Lo = &TargetX86Base<Machine>::_sub; 3369 Op_Lo = &TargetX86Base<Machine>::_sub;
3247 Op_Hi = &TargetX86Base<Machine>::_sbb; 3370 Op_Hi = &TargetX86Base<Machine>::_sbb;
3248 break; 3371 break;
3249 } 3372 }
3250 typename Traits::X86OperandMem *Addr = 3373 typename Traits::X86OperandMem *Addr =
3251 formMemoryOperand(Ptr, Dest->getType()); 3374 formMemoryOperand(Ptr, Dest->getType());
3252 const bool Locked = true; 3375 const bool Locked = true;
3253 Variable *T = nullptr; 3376 Variable *T = nullptr;
3254 _mov(T, Val); 3377 _mov(T, Val);
(...skipping 16 matching lines...) Expand all
3271 NeedsCmpxchg = true; 3394 NeedsCmpxchg = true;
3272 Op_Lo = &TargetX86Base<Machine>::_and; 3395 Op_Lo = &TargetX86Base<Machine>::_and;
3273 Op_Hi = &TargetX86Base<Machine>::_and; 3396 Op_Hi = &TargetX86Base<Machine>::_and;
3274 break; 3397 break;
3275 case Intrinsics::AtomicXor: 3398 case Intrinsics::AtomicXor:
3276 NeedsCmpxchg = true; 3399 NeedsCmpxchg = true;
3277 Op_Lo = &TargetX86Base<Machine>::_xor; 3400 Op_Lo = &TargetX86Base<Machine>::_xor;
3278 Op_Hi = &TargetX86Base<Machine>::_xor; 3401 Op_Hi = &TargetX86Base<Machine>::_xor;
3279 break; 3402 break;
3280 case Intrinsics::AtomicExchange: 3403 case Intrinsics::AtomicExchange:
3281 if (Dest->getType() == IceType_i64) { 3404 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3282 NeedsCmpxchg = true; 3405 NeedsCmpxchg = true;
3283 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3406 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3284 // just need to be moved to the ecx and ebx registers. 3407 // just need to be moved to the ecx and ebx registers.
3285 Op_Lo = nullptr; 3408 Op_Lo = nullptr;
3286 Op_Hi = nullptr; 3409 Op_Hi = nullptr;
3287 break; 3410 break;
3288 } 3411 }
3289 typename Traits::X86OperandMem *Addr = 3412 typename Traits::X86OperandMem *Addr =
3290 formMemoryOperand(Ptr, Dest->getType()); 3413 formMemoryOperand(Ptr, Dest->getType());
3291 Variable *T = nullptr; 3414 Variable *T = nullptr;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
3325 // .LABEL: 3448 // .LABEL:
3326 // mov <reg>, eax 3449 // mov <reg>, eax
3327 // op <reg>, [desired_adj] 3450 // op <reg>, [desired_adj]
3328 // lock cmpxchg [ptr], <reg> 3451 // lock cmpxchg [ptr], <reg>
3329 // jne .LABEL 3452 // jne .LABEL
3330 // mov <dest>, eax 3453 // mov <dest>, eax
3331 // 3454 //
3332 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3455 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3333 Val = legalize(Val); 3456 Val = legalize(Val);
3334 Type Ty = Val->getType(); 3457 Type Ty = Val->getType();
3335 if (Ty == IceType_i64) { 3458 if (!Traits::Is64Bit && Ty == IceType_i64) {
3336 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3459 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3337 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3460 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3338 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3461 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3339 _mov(T_eax, loOperand(Addr)); 3462 _mov(T_eax, loOperand(Addr));
3340 _mov(T_edx, hiOperand(Addr)); 3463 _mov(T_edx, hiOperand(Addr));
3341 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3464 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3342 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3465 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3343 typename Traits::Insts::Label *Label = 3466 typename Traits::Insts::Label *Label =
3344 Traits::Insts::Label::create(Func, this); 3467 Traits::Insts::Label::create(Func, this);
3345 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3468 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
3426 // cmovne T_DEST, IF_NOT_ZERO 3549 // cmovne T_DEST, IF_NOT_ZERO
3427 // xor T_DEST, 31 3550 // xor T_DEST, 31
3428 // mov DEST, T_DEST 3551 // mov DEST, T_DEST
3429 // 3552 //
3430 // NOTE: T_DEST must be a register because cmov requires its dest to be a 3553 // NOTE: T_DEST must be a register because cmov requires its dest to be a
3431 // register. Also, bsf and bsr require their dest to be a register. 3554 // register. Also, bsf and bsr require their dest to be a register.
3432 // 3555 //
3433 // The xor DEST, 31 converts a bit position to # of leading zeroes. 3556 // The xor DEST, 31 converts a bit position to # of leading zeroes.
3434 // E.g., for 000... 00001100, bsr will say that the most significant bit 3557 // E.g., for 000... 00001100, bsr will say that the most significant bit
3435 // set is at position 3, while the number of leading zeros is 28. Xor is 3558 // set is at position 3, while the number of leading zeros is 28. Xor is
3436 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case). 3559 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros
Jim Stichnoth 2015/08/10 19:39:20 reformat?
John 2015/08/10 20:41:17 Done.
3560 // case).
3437 // 3561 //
3438 // Similar for 64-bit, but start w/ speculating that the upper 32 bits 3562 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3439 // are all zero, and compute the result for that case (checking the lower 3563 // are all zero, and compute the result for that case (checking the lower
3440 // 32 bits). Then actually compute the result for the upper bits and 3564 // 32 bits). Then actually compute the result for the upper bits and
3441 // cmov in the result from the lower computation if the earlier speculation 3565 // cmov in the result from the lower computation if the earlier speculation
3442 // was correct. 3566 // was correct.
3443 // 3567 //
3444 // Cttz, is similar, but uses bsf instead, and doesn't require the xor 3568 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3445 // bit position conversion, and the speculation is reversed. 3569 // bit position conversion, and the speculation is reversed.
3446 assert(Ty == IceType_i32 || Ty == IceType_i64); 3570 assert(Ty == IceType_i32 || Ty == IceType_i64);
(...skipping 10 matching lines...) Expand all
3457 if (Cttz) { 3581 if (Cttz) {
3458 _mov(T_Dest, ThirtyTwo); 3582 _mov(T_Dest, ThirtyTwo);
3459 } else { 3583 } else {
3460 Constant *SixtyThree = Ctx->getConstantInt32(63); 3584 Constant *SixtyThree = Ctx->getConstantInt32(63);
3461 _mov(T_Dest, SixtyThree); 3585 _mov(T_Dest, SixtyThree);
3462 } 3586 }
3463 _cmov(T_Dest, T, Traits::Cond::Br_ne); 3587 _cmov(T_Dest, T, Traits::Cond::Br_ne);
3464 if (!Cttz) { 3588 if (!Cttz) {
3465 _xor(T_Dest, ThirtyOne); 3589 _xor(T_Dest, ThirtyOne);
3466 } 3590 }
3467 if (Ty == IceType_i32) { 3591 if (Traits::Is64Bit || Ty == IceType_i32) {
3468 _mov(Dest, T_Dest); 3592 _mov(Dest, T_Dest);
3469 return; 3593 return;
3470 } 3594 }
3471 _add(T_Dest, ThirtyTwo); 3595 _add(T_Dest, ThirtyTwo);
3472 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3596 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3473 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3597 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3474 // Will be using "test" on this, so we need a registerized variable. 3598 // Will be using "test" on this, so we need a registerized variable.
3475 Variable *SecondVar = legalizeToReg(SecondVal); 3599 Variable *SecondVar = legalizeToReg(SecondVal);
3476 Variable *T_Dest2 = makeReg(IceType_i32); 3600 Variable *T_Dest2 = makeReg(IceType_i32);
3477 if (Cttz) { 3601 if (Cttz) {
(...skipping 19 matching lines...) Expand all
3497 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); 3621 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
3498 const bool IsCountConst = CountConst != nullptr; 3622 const bool IsCountConst = CountConst != nullptr;
3499 const bool IsValConst = ValConst != nullptr; 3623 const bool IsValConst = ValConst != nullptr;
3500 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; 3624 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3501 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0; 3625 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
3502 3626
3503 // Unlikely, but nothing to do if it does happen 3627 // Unlikely, but nothing to do if it does happen
3504 if (IsCountConst && CountValue == 0) 3628 if (IsCountConst && CountValue == 0)
3505 return; 3629 return;
3506 3630
3507 // TODO(ascull): if the count is constant but val is not it would be possible 3631 // TODO(ascull): if the count is constant but val is not it would be
3508 // to inline by spreading the value across 4 bytes and accessing subregs e.g. 3632 // possible
Jim Stichnoth 2015/08/10 19:39:20 More weird linebreak stuff
John 2015/08/10 20:41:17 Done.
3633 // to inline by spreading the value across 4 bytes and accessing subregs
3634 // e.g.
3509 // eax, ax and al. 3635 // eax, ax and al.
3510 if (IsCountConst && IsValConst) { 3636 if (IsCountConst && IsValConst) {
3511 Variable *Base = legalizeToReg(Dest); 3637 Variable *Base = legalizeToReg(Dest);
3512 // Add a FakeUse in case Base is ultimately not used, e.g. it falls back to 3638 // Add a FakeUse in case Base is ultimately not used, e.g. it falls back
3639 // to
3513 // calling memset(). Otherwise Om1 register allocation fails because this 3640 // calling memset(). Otherwise Om1 register allocation fails because this
3514 // infinite-weight variable has a definition but no uses. 3641 // infinite-weight variable has a definition but no uses.
3515 Context.insert(InstFakeUse::create(Func, Base)); 3642 Context.insert(InstFakeUse::create(Func, Base));
3516 3643
3517 // 3 is the awkward size as it is too small for the vector or 32-bit 3644 // 3 is the awkward size as it is too small for the vector or 32-bit
3518 // operations and will not work with lowerLeftOvers as there is no valid 3645 // operations and will not work with lowerLeftOvers as there is no valid
3519 // overlap. 3646 // overlap.
3520 if (CountValue == 3) { 3647 if (CountValue == 3) {
3521 Constant *Offset = nullptr; 3648 Constant *Offset = nullptr;
3522 auto *Mem = 3649 auto *Mem =
(...skipping 361 matching lines...) Expand 10 before | Expand all | Expand 10 after
3884 // Index is Index=Var-Const ==> 4011 // Index is Index=Var-Const ==>
3885 // set Index=Var, Offset-=(Const<<Shift) 4012 // set Index=Var, Offset-=(Const<<Shift)
3886 4013
3887 // TODO: consider overflow issues with respect to Offset. 4014 // TODO: consider overflow issues with respect to Offset.
3888 // TODO: handle symbolic constants. 4015 // TODO: handle symbolic constants.
3889 } 4016 }
3890 } 4017 }
3891 4018
3892 template <class Machine> 4019 template <class Machine>
3893 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { 4020 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
3894 // A Load instruction can be treated the same as an Assign instruction, after 4021 // A Load instruction can be treated the same as an Assign instruction,
4022 // after
3895 // the source operand is transformed into an Traits::X86OperandMem operand. 4023 // the source operand is transformed into an Traits::X86OperandMem operand.
3896 // Note that the address mode optimization already creates an 4024 // Note that the address mode optimization already creates an
3897 // Traits::X86OperandMem operand, so it doesn't need another level of 4025 // Traits::X86OperandMem operand, so it doesn't need another level of
3898 // transformation. 4026 // transformation.
3899 Variable *DestLoad = Load->getDest(); 4027 Variable *DestLoad = Load->getDest();
3900 Type Ty = DestLoad->getType(); 4028 Type Ty = DestLoad->getType();
3901 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4029 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
3902 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 4030 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
3903 lowerAssign(Assign); 4031 lowerAssign(Assign);
3904 } 4032 }
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
4046 return; 4174 return;
4047 } 4175 }
4048 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4176 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4049 // But if SrcT is immediate, we might be able to do better, as 4177 // But if SrcT is immediate, we might be able to do better, as
4050 // the cmov instruction doesn't allow an immediate operand: 4178 // the cmov instruction doesn't allow an immediate operand:
4051 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4179 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4052 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4180 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4053 std::swap(SrcT, SrcF); 4181 std::swap(SrcT, SrcF);
4054 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4182 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4055 } 4183 }
4056 if (DestTy == IceType_i64) { 4184 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4057 SrcT = legalizeUndef(SrcT); 4185 SrcT = legalizeUndef(SrcT);
4058 SrcF = legalizeUndef(SrcF); 4186 SrcF = legalizeUndef(SrcF);
4059 // Set the low portion. 4187 // Set the low portion.
4060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4188 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4061 Variable *TLo = nullptr; 4189 Variable *TLo = nullptr;
4062 Operand *SrcFLo = legalize(loOperand(SrcF)); 4190 Operand *SrcFLo = legalize(loOperand(SrcF));
4063 _mov(TLo, SrcFLo); 4191 _mov(TLo, SrcFLo);
4064 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); 4192 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4065 _cmov(TLo, SrcTLo, Cond); 4193 _cmov(TLo, SrcTLo, Cond);
4066 _mov(DestLo, TLo); 4194 _mov(DestLo, TLo);
4067 // Set the high portion. 4195 // Set the high portion.
4068 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4196 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4069 Variable *THi = nullptr; 4197 Variable *THi = nullptr;
4070 Operand *SrcFHi = legalize(hiOperand(SrcF)); 4198 Operand *SrcFHi = legalize(hiOperand(SrcF));
4071 _mov(THi, SrcFHi); 4199 _mov(THi, SrcFHi);
4072 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem); 4200 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
4073 _cmov(THi, SrcTHi, Cond); 4201 _cmov(THi, SrcTHi, Cond);
4074 _mov(DestHi, THi); 4202 _mov(DestHi, THi);
4075 return; 4203 return;
4076 } 4204 }
4077 4205
4078 assert(DestTy == IceType_i16 || DestTy == IceType_i32); 4206 assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
4207 (Traits::Is64Bit && DestTy == IceType_i64));
4079 Variable *T = nullptr; 4208 Variable *T = nullptr;
4080 SrcF = legalize(SrcF); 4209 SrcF = legalize(SrcF);
4081 _mov(T, SrcF); 4210 _mov(T, SrcF);
4082 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4211 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4083 _cmov(T, SrcT, Cond); 4212 _cmov(T, SrcT, Cond);
4084 _mov(Dest, T); 4213 _mov(Dest, T);
4085 } 4214 }
4086 4215
4087 template <class Machine> 4216 template <class Machine>
4088 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4217 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4089 Operand *Value = Inst->getData(); 4218 Operand *Value = Inst->getData();
4090 Operand *Addr = Inst->getAddr(); 4219 Operand *Addr = Inst->getAddr();
4091 typename Traits::X86OperandMem *NewAddr = 4220 typename Traits::X86OperandMem *NewAddr =
4092 formMemoryOperand(Addr, Value->getType()); 4221 formMemoryOperand(Addr, Value->getType());
4093 Type Ty = NewAddr->getType(); 4222 Type Ty = NewAddr->getType();
4094 4223
4095 if (Ty == IceType_i64) { 4224 if (!Traits::Is64Bit && Ty == IceType_i64) {
4096 Value = legalizeUndef(Value); 4225 Value = legalizeUndef(Value);
4097 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4226 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4098 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4227 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4099 _store(ValueHi, 4228 _store(ValueHi,
4100 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr))); 4229 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4101 _store(ValueLo, 4230 _store(ValueLo,
4102 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr))); 4231 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
4103 } else if (isVectorType(Ty)) { 4232 } else if (isVectorType(Ty)) {
4104 _storep(legalizeToReg(Value), NewAddr); 4233 _storep(legalizeToReg(Value), NewAddr);
4105 } else { 4234 } else {
(...skipping 27 matching lines...) Expand all
4133 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4262 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4134 Context.insert(NewStore); 4263 Context.insert(NewStore);
4135 } 4264 }
4136 } 4265 }
4137 4266
4138 template <class Machine> 4267 template <class Machine>
4139 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison, 4268 Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
4140 uint64_t Min, uint64_t Max) { 4269 uint64_t Min, uint64_t Max) {
4141 // TODO(ascull): 64-bit should not reach here but only because it is not 4270 // TODO(ascull): 64-bit should not reach here but only because it is not
4142 // implemented yet. This should be able to handle the 64-bit case. 4271 // implemented yet. This should be able to handle the 64-bit case.
4143 assert(Comparison->getType() != IceType_i64); 4272 assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
4144 // Subtracting 0 is a nop so don't do it 4273 // Subtracting 0 is a nop so don't do it
4145 if (Min != 0) { 4274 if (Min != 0) {
4146 // Avoid clobbering the comparison by copying it 4275 // Avoid clobbering the comparison by copying it
4147 Variable *T = nullptr; 4276 Variable *T = nullptr;
4148 _mov(T, Comparison); 4277 _mov(T, Comparison);
4149 _sub(T, Ctx->getConstantInt32(Min)); 4278 _sub(T, Ctx->getConstantInt32(Min));
4150 Comparison = T; 4279 Comparison = T;
4151 } 4280 }
4152 4281
4153 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); 4282 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
4232 4361
4233 template <class Machine> 4362 template <class Machine>
4234 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 4363 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4235 // Group cases together and navigate through them with a binary search 4364 // Group cases together and navigate through them with a binary search
4236 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); 4365 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);
4237 Operand *Src0 = Inst->getComparison(); 4366 Operand *Src0 = Inst->getComparison();
4238 CfgNode *DefaultTarget = Inst->getLabelDefault(); 4367 CfgNode *DefaultTarget = Inst->getLabelDefault();
4239 4368
4240 assert(CaseClusters.size() != 0); // Should always be at least one 4369 assert(CaseClusters.size() != 0); // Should always be at least one
4241 4370
4242 if (Src0->getType() == IceType_i64) { 4371 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
4243 Src0 = legalize(Src0); // get Base/Index into physical registers 4372 Src0 = legalize(Src0); // get Base/Index into physical registers
4244 Operand *Src0Lo = loOperand(Src0); 4373 Operand *Src0Lo = loOperand(Src0);
4245 Operand *Src0Hi = hiOperand(Src0); 4374 Operand *Src0Hi = hiOperand(Src0);
4246 if (CaseClusters.back().getHigh() > UINT32_MAX) { 4375 if (CaseClusters.back().getHigh() > UINT32_MAX) {
4247 // TODO(ascull): handle 64-bit case properly (currently naive version) 4376 // TODO(ascull): handle 64-bit case properly (currently naive version)
4248 // This might be handled by a higher level lowering of switches. 4377 // This might be handled by a higher level lowering of switches.
4249 SizeT NumCases = Inst->getNumCases(); 4378 SizeT NumCases = Inst->getNumCases();
4250 if (NumCases >= 2) { 4379 if (NumCases >= 2) {
4251 Src0Lo = legalizeToReg(Src0Lo); 4380 Src0Lo = legalizeToReg(Src0Lo);
4252 Src0Hi = legalizeToReg(Src0Hi); 4381 Src0Hi = legalizeToReg(Src0Hi);
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after
4437 // that follows. This means that the original Store instruction is 4566 // that follows. This means that the original Store instruction is
4438 // still there, either because the value being stored is used beyond 4567 // still there, either because the value being stored is used beyond
4439 // the Store instruction, or because dead code elimination did not 4568 // the Store instruction, or because dead code elimination did not
4440 // happen. In either case, we cancel RMW lowering (and the caller 4569 // happen. In either case, we cancel RMW lowering (and the caller
4441 // deletes the RMW instruction). 4570 // deletes the RMW instruction).
4442 if (!RMW->isLastUse(RMW->getBeacon())) 4571 if (!RMW->isLastUse(RMW->getBeacon()))
4443 return; 4572 return;
4444 Operand *Src = RMW->getData(); 4573 Operand *Src = RMW->getData();
4445 Type Ty = Src->getType(); 4574 Type Ty = Src->getType();
4446 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 4575 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4447 if (Ty == IceType_i64) { 4576 if (!Traits::Is64Bit && Ty == IceType_i64) {
4448 Src = legalizeUndef(Src); 4577 Src = legalizeUndef(Src);
4449 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 4578 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4450 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 4579 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
4451 typename Traits::X86OperandMem *AddrLo = 4580 typename Traits::X86OperandMem *AddrLo =
4452 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr)); 4581 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4453 typename Traits::X86OperandMem *AddrHi = 4582 typename Traits::X86OperandMem *AddrHi =
4454 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr)); 4583 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
4455 switch (RMW->getOp()) { 4584 switch (RMW->getOp()) {
4456 default: 4585 default:
4457 // TODO(stichnot): Implement other arithmetic operators. 4586 // TODO(stichnot): Implement other arithmetic operators.
(...skipping 13 matching lines...) Expand all
4471 case InstArithmetic::Or: 4600 case InstArithmetic::Or:
4472 _or_rmw(AddrLo, SrcLo); 4601 _or_rmw(AddrLo, SrcLo);
4473 _or_rmw(AddrHi, SrcHi); 4602 _or_rmw(AddrHi, SrcHi);
4474 return; 4603 return;
4475 case InstArithmetic::Xor: 4604 case InstArithmetic::Xor:
4476 _xor_rmw(AddrLo, SrcLo); 4605 _xor_rmw(AddrLo, SrcLo);
4477 _xor_rmw(AddrHi, SrcHi); 4606 _xor_rmw(AddrHi, SrcHi);
4478 return; 4607 return;
4479 } 4608 }
4480 } else { 4609 } else {
4481 // i8, i16, i32 4610 // x86-32: i8, i16, i32
4611 // x86-64: i8, i16, i32, i64
4482 switch (RMW->getOp()) { 4612 switch (RMW->getOp()) {
4483 default: 4613 default:
4484 // TODO(stichnot): Implement other arithmetic operators. 4614 // TODO(stichnot): Implement other arithmetic operators.
4485 break; 4615 break;
4486 case InstArithmetic::Add: 4616 case InstArithmetic::Add:
4487 Src = legalize(Src, Legal_Reg | Legal_Imm); 4617 Src = legalize(Src, Legal_Reg | Legal_Imm);
4488 _add_rmw(Addr, Src); 4618 _add_rmw(Addr, Src);
4489 return; 4619 return;
4490 case InstArithmetic::Sub: 4620 case InstArithmetic::Sub:
4491 Src = legalize(Src, Legal_Reg | Legal_Imm); 4621 Src = legalize(Src, Legal_Reg | Legal_Imm);
(...skipping 24 matching lines...) Expand all
4516 } else { 4646 } else {
4517 TargetLowering::lowerOther(Instr); 4647 TargetLowering::lowerOther(Instr);
4518 } 4648 }
4519 } 4649 }
4520 4650
4521 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4651 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4522 /// preserve integrity of liveness analysis. Undef values are also 4652 /// preserve integrity of liveness analysis. Undef values are also
4523 /// turned into zeroes, since loOperand() and hiOperand() don't expect 4653 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4524 /// Undef input. 4654 /// Undef input.
4525 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4655 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4526 // Pause constant blinding or pooling, blinding or pooling will be done later 4656 if (Traits::Is64Bit) {
4657 // On x86-64 we don't need to prelower phis -- the architecture can handle
4658 // 64-bit integer natively.
4659 return;
4660 }
4661
4662 // Pause constant blinding or pooling, blinding or pooling will be done
4663 // later
4527 // during phi lowering assignments 4664 // during phi lowering assignments
4528 BoolFlagSaver B(RandomizationPoolingPaused, true); 4665 BoolFlagSaver B(RandomizationPoolingPaused, true);
4529 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 4666 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4530 this, Context.getNode(), Func); 4667 this, Context.getNode(), Func);
4531 } 4668 }
4532 4669
4533 // There is no support for loading or emitting vector constants, so the 4670 // There is no support for loading or emitting vector constants, so the
4534 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4671 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
4535 // etc. are initialized with register operations. 4672 // etc. are initialized with register operations.
4536 // 4673 //
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
4678 if (auto *Const = llvm::dyn_cast<Constant>(From)) { 4815 if (auto *Const = llvm::dyn_cast<Constant>(From)) {
4679 if (llvm::isa<ConstantUndef>(Const)) { 4816 if (llvm::isa<ConstantUndef>(Const)) {
4680 From = legalizeUndef(Const, RegNum); 4817 From = legalizeUndef(Const, RegNum);
4681 if (isVectorType(Ty)) 4818 if (isVectorType(Ty))
4682 return From; 4819 return From;
4683 Const = llvm::cast<Constant>(From); 4820 Const = llvm::cast<Constant>(From);
4684 } 4821 }
4685 // There should be no constants of vector type (other than undef). 4822 // There should be no constants of vector type (other than undef).
4686 assert(!isVectorType(Ty)); 4823 assert(!isVectorType(Ty));
4687 4824
4825 // If the operand is a 64 bit constant integer we need to legalize it to a
4826 // register in x86-64.
4827 if (Traits::Is64Bit) {
4828 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Const)) {
Jim Stichnoth 2015/08/10 19:39:20 Use isa<> instead of dyn_cast<>.
John 2015/08/10 20:41:17 Is there any rule for isa v. dyn_cast? In this cas
Jim Stichnoth 2015/08/11 16:01:36 You're probably right about equivalent code genera
John 2015/08/12 19:27:54 Fair enough. Done.
4829 Variable *V = copyToReg(C, RegNum);
4830 V->setWeightInfinite();
4831 return V;
4832 }
4833 }
4834
4688 // If the operand is an 32 bit constant integer, we should check 4835 // If the operand is an 32 bit constant integer, we should check
4689 // whether we need to randomize it or pool it. 4836 // whether we need to randomize it or pool it.
4690 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { 4837 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
4691 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); 4838 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
4692 if (NewConst != Const) { 4839 if (NewConst != Const) {
4693 return NewConst; 4840 return NewConst;
4694 } 4841 }
4695 } 4842 }
4696 4843
4697 // Convert a scalar floating point constant into an explicit 4844 // Convert a scalar floating point constant into an explicit
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
4815 } 4962 }
4816 // Do legalization, which contains randomization/pooling 4963 // Do legalization, which contains randomization/pooling
4817 // or do randomization/pooling. 4964 // or do randomization/pooling.
4818 return llvm::cast<typename Traits::X86OperandMem>( 4965 return llvm::cast<typename Traits::X86OperandMem>(
4819 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 4966 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
4820 } 4967 }
4821 4968
4822 template <class Machine> 4969 template <class Machine>
4823 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 4970 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
4824 // There aren't any 64-bit integer registers for x86-32. 4971 // There aren't any 64-bit integer registers for x86-32.
4825 assert(Type != IceType_i64); 4972 assert(Traits::Is64Bit || Type != IceType_i64);
4826 Variable *Reg = Func->makeVariable(Type); 4973 Variable *Reg = Func->makeVariable(Type);
4827 if (RegNum == Variable::NoRegister) 4974 if (RegNum == Variable::NoRegister)
4828 Reg->setWeightInfinite(); 4975 Reg->setWeightInfinite();
4829 else 4976 else
4830 Reg->setRegNum(RegNum); 4977 Reg->setRegNum(RegNum);
4831 return Reg; 4978 return Reg;
4832 } 4979 }
4833 4980
4834 template <class Machine> void TargetX86Base<Machine>::postLower() { 4981 template <class Machine> void TargetX86Base<Machine>::postLower() {
4835 if (Ctx->getFlags().getOptLevel() == Opt_m1) 4982 if (Ctx->getFlags().getOptLevel() == Opt_m1)
(...skipping 11 matching lines...) Expand all
4847 4994
4848 template <class Machine> 4995 template <class Machine>
4849 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { 4996 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
4850 if (!BuildDefs::dump()) 4997 if (!BuildDefs::dump())
4851 return; 4998 return;
4852 Ostream &Str = Ctx->getStrEmit(); 4999 Ostream &Str = Ctx->getStrEmit();
4853 Str << getConstantPrefix() << C->getValue(); 5000 Str << getConstantPrefix() << C->getValue();
4854 } 5001 }
4855 5002
4856 template <class Machine> 5003 template <class Machine>
4857 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const { 5004 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
4858 llvm::report_fatal_error("Not expecting to emit 64-bit integers"); 5005 if (!Traits::Is64Bit) {
5006 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
5007 } else {
5008 if (!BuildDefs::dump())
5009 return;
5010 Ostream &Str = Ctx->getStrEmit();
5011 Str << getConstantPrefix() << C->getValue();
5012 }
4859 } 5013 }
4860 5014
4861 template <class Machine> 5015 template <class Machine>
4862 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const { 5016 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {
4863 if (!BuildDefs::dump()) 5017 if (!BuildDefs::dump())
4864 return; 5018 return;
4865 Ostream &Str = Ctx->getStrEmit(); 5019 Ostream &Str = Ctx->getStrEmit();
4866 C->emitPoolLabel(Str); 5020 C->emitPoolLabel(Str);
4867 } 5021 }
4868 5022
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
4993 Constant *Mask1 = Ctx->getConstantInt( 5147 Constant *Mask1 = Ctx->getConstantInt(
4994 MemOperand->getOffset()->getType(), Cookie + Value); 5148 MemOperand->getOffset()->getType(), Cookie + Value);
4995 Constant *Mask2 = 5149 Constant *Mask2 =
4996 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5150 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
4997 5151
4998 typename Traits::X86OperandMem *TempMemOperand = 5152 typename Traits::X86OperandMem *TempMemOperand =
4999 Traits::X86OperandMem::create(Func, MemOperand->getType(), 5153 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5000 MemOperand->getBase(), Mask1); 5154 MemOperand->getBase(), Mask1);
5001 // If we have already assigned a physical register, we must come from 5155 // If we have already assigned a physical register, we must come from
5002 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5156 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5003 // the assigned register as this assignment is that start of its use-def 5157 // the assigned register as this assignment is that start of its
5158 // use-def
5004 // chain. So we add RegNum argument here. 5159 // chain. So we add RegNum argument here.
5005 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 5160 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5006 _lea(RegTemp, TempMemOperand); 5161 _lea(RegTemp, TempMemOperand);
5007 // As source operand doesn't use the dstreg, we don't need to add 5162 // As source operand doesn't use the dstreg, we don't need to add
5008 // _set_dest_nonkillable(). 5163 // _set_dest_nonkillable().
5009 // But if we use the same Dest Reg, that is, with RegNum 5164 // But if we use the same Dest Reg, that is, with RegNum
5010 // assigned, we should add this _set_dest_nonkillable() 5165 // assigned, we should add this _set_dest_nonkillable()
5011 if (RegNum != Variable::NoRegister) 5166 if (RegNum != Variable::NoRegister)
5012 _set_dest_nonkillable(); 5167 _set_dest_nonkillable();
5013 5168
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
5077 } 5232 }
5078 // the offset is not eligible for blinding or pooling, return the original 5233 // the offset is not eligible for blinding or pooling, return the original
5079 // mem operand 5234 // mem operand
5080 return MemOperand; 5235 return MemOperand;
5081 } 5236 }
5082 5237
5083 } // end of namespace X86Internal 5238 } // end of namespace X86Internal
5084 } // end of namespace Ice 5239 } // end of namespace Ice
5085 5240
5086 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5241 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698