src/IceTargetLoweringX86BaseImpl.h - Issue 1273153002: Subzero. Native 64-bit int arithmetic on x86-64.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1273153002: Subzero. Native 64-bit int arithmetic on x86-64. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Removes the x8664-specific xtest target. Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
113 };	113 };

114	114

115 template <class MachineTraits>	115 template <class MachineTraits>

116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)	116 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)

117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}	117 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}

118	118

119 template <class MachineTraits>	119 template <class MachineTraits>

120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind	120 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind

121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {	121 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {

122 if (llvm::isa<InstIcmp>(Instr)) {	122 if (llvm::isa<InstIcmp>(Instr)) {

123 if (Instr->getSrc(0)->getType() != IceType_i64)	123 if (MachineTraits::Is64Bit \|\| Instr->getSrc(0)->getType() != IceType_i64)

124 return PK_Icmp32;	124 return PK_Icmp32;
	Jim Stichnoth 2015/08/10 19:39:20 Probably need to rename PK_Icmp32 to something ref Probably need to rename PK_Icmp32 to something reflecting a one-operation compare (versus the 2-3 compares from the i64->i32 lowering). Maybe leave a TODO somewhere John 2015/08/10 20:41:17 TODO'ed. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > Probably need to rename PK_Icmp32 to something reflecting a one-operation > compare (versus the 2-3 compares from the i64->i32 lowering). Maybe leave a > TODO somewhere TODO'ed. Jim Stichnoth 2015/08/11 16:01:36 TODO'ed, dude? TODONE. Show quoted text On 2015/08/10 20:41:17, John wrote: > On 2015/08/10 19:39:20, stichnot wrote: > > Probably need to rename PK_Icmp32 to something reflecting a one-operation > > compare (versus the 2-3 compares from the i64->i32 lowering). Maybe leave a > > TODO somewhere > > TODO'ed. TODO'ed, dude? TODONE.
125 return PK_None; // TODO(stichnot): actually PK_Icmp64;	125 return PK_None; // TODO(stichnot): actually PK_Icmp64;

126 }	126 }

127 return PK_None; // TODO(stichnot): remove this	127 return PK_None; // TODO(stichnot): remove this

128	128

129 if (llvm::isa<InstFcmp>(Instr))	129 if (llvm::isa<InstFcmp>(Instr))

130 return PK_Fcmp;	130 return PK_Fcmp;

131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {	131 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

132 switch (Cast->getCastKind()) {	132 switch (Cast->getCastKind()) {

133 default:	133 default:

134 return PK_None;	134 return PK_None;

(...skipping 501 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
636 // instruction or equivalent.	636 // instruction or equivalent.

637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {	637 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

638 // An InstLoad always qualifies.	638 // An InstLoad always qualifies.

639 LoadDest = Load->getDest();	639 LoadDest = Load->getDest();

640 const bool DoLegalize = false;	640 const bool DoLegalize = false;

641 LoadSrc = formMemoryOperand(Load->getSourceAddress(),	641 LoadSrc = formMemoryOperand(Load->getSourceAddress(),

642 LoadDest->getType(), DoLegalize);	642 LoadDest->getType(), DoLegalize);

643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {	643 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

644 // An AtomicLoad intrinsic qualifies as long as it has a valid	644 // An AtomicLoad intrinsic qualifies as long as it has a valid

645 // memory ordering, and can be implemented in a single	645 // memory ordering, and can be implemented in a single

646 // instruction (i.e., not i64).	646 // instruction (i.e., not i64 on x86-32).

647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;	647 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

648 if (ID == Intrinsics::AtomicLoad &&	648 if (ID == Intrinsics::AtomicLoad &&

649 Intrin->getDest()->getType() != IceType_i64 &&	649 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&

650 Intrinsics::isMemoryOrderValid(	650 Intrinsics::isMemoryOrderValid(

651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {	651 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {

652 LoadDest = Intrin->getDest();	652 LoadDest = Intrin->getDest();

653 const bool DoLegalize = false;	653 const bool DoLegalize = false;

654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),	654 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),

655 DoLegalize);	655 DoLegalize);

656 }	656 }

657 }	657 }

658 // A Load instruction can be folded into the following	658 // A Load instruction can be folded into the following

659 // instruction only if the following instruction ends the Load's	659 // instruction only if the following instruction ends the Load's

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
717 template <class Machine>	717 template <class Machine>

718 bool TargetX86Base<Machine>::doBranchOpt(Inst I, const CfgNode NextNode) {	718 bool TargetX86Base<Machine>::doBranchOpt(Inst I, const CfgNode NextNode) {

719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {	719 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {

720 return Br->optimizeBranch(NextNode);	720 return Br->optimizeBranch(NextNode);

721 }	721 }

722 return false;	722 return false;

723 }	723 }

724	724

725 template <class Machine>	725 template <class Machine>

726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {	726 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {

	727 // Special case: never allow partial reads/writes to/from %rBP and %rSP.

	728 if (RegNum == Traits::RegisterSet::Reg_esp \|\|

	729 RegNum == Traits::RegisterSet::Reg_ebp)

	730 Ty = Traits::WordType;

727 if (Ty == IceType_void)	731 if (Ty == IceType_void)

728 Ty = IceType_i32;	732 Ty = IceType_i32;

729 if (PhysicalRegisters[Ty].empty())	733 if (PhysicalRegisters[Ty].empty())

730 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);	734 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);

731 assert(RegNum < PhysicalRegisters[Ty].size());	735 assert(RegNum < PhysicalRegisters[Ty].size());

732 Variable *Reg = PhysicalRegisters[Ty][RegNum];	736 Variable *Reg = PhysicalRegisters[Ty][RegNum];

733 if (Reg == nullptr) {	737 if (Reg == nullptr) {

734 Reg = Func->makeVariable(Ty);	738 Reg = Func->makeVariable(Ty);

735 Reg->setRegNum(RegNum);	739 Reg->setRegNum(RegNum);

736 PhysicalRegisters[Ty][RegNum] = Reg;	740 PhysicalRegisters[Ty][RegNum] = Reg;

(...skipping 26 matching lines...) Expand all Loading...
763 }	767 }

764 int32_t Offset = Var->getStackOffset();	768 int32_t Offset = Var->getStackOffset();

765 int32_t BaseRegNum = Var->getBaseRegNum();	769 int32_t BaseRegNum = Var->getBaseRegNum();

766 if (BaseRegNum == Variable::NoRegister) {	770 if (BaseRegNum == Variable::NoRegister) {

767 BaseRegNum = getFrameOrStackReg();	771 BaseRegNum = getFrameOrStackReg();

768 if (!hasFramePointer())	772 if (!hasFramePointer())

769 Offset += getStackAdjustment();	773 Offset += getStackAdjustment();

770 }	774 }

771 if (Offset)	775 if (Offset)

772 Str << Offset;	776 Str << Offset;

773 const Type FrameSPTy = IceType_i32;	777 const Type FrameSPTy = Traits::WordType;

774 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";	778 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";

775 }	779 }

776	780

777 template <class Machine>	781 template <class Machine>

778 typename TargetX86Base<Machine>::Traits::Address	782 typename TargetX86Base<Machine>::Traits::Address

779 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {	783 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {

780 if (Var->hasReg())	784 if (Var->hasReg())

781 llvm_unreachable("Stack Variable has a register assigned");	785 llvm_unreachable("Stack Variable has a register assigned");

782 if (Var->getWeight().isInf()) {	786 if (Var->getWeight().isInf()) {

783 llvm_unreachable("Infinite-weight Variable has no register assigned");	787 llvm_unreachable("Infinite-weight Variable has no register assigned");

(...skipping 19 matching lines...) Expand all Loading...
803 /// function generates an instruction to copy Arg into its assigned	807 /// function generates an instruction to copy Arg into its assigned

804 /// register if applicable.	808 /// register if applicable.

805 template <class Machine>	809 template <class Machine>

806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,	810 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

807 Variable *FramePtr,	811 Variable *FramePtr,

808 size_t BasicFrameOffset,	812 size_t BasicFrameOffset,

809 size_t &InArgsSizeBytes) {	813 size_t &InArgsSizeBytes) {

810 Variable *Lo = Arg->getLo();	814 Variable *Lo = Arg->getLo();

811 Variable *Hi = Arg->getHi();	815 Variable *Hi = Arg->getHi();

812 Type Ty = Arg->getType();	816 Type Ty = Arg->getType();

813 if (Lo && Hi && Ty == IceType_i64) {	817 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {

814 // TODO(jpp): This special case is not needed for x86-64.

815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion	818 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion	819 assert(Hi->getType() != IceType_i64); // don't want infinite recursion

817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);	820 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);	821 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

819 return;	822 return;

820 }	823 }

821 if (isVectorType(Ty)) {	824 if (isVectorType(Ty)) {

822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);	825 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);

823 }	826 }

824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);	827 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);	828 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

826 if (Arg->hasReg()) {	829 if (Arg->hasReg()) {

827 assert(Ty != IceType_i64);	830 assert(Ty != IceType_i64 \|\| Traits::Is64Bit);

828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(	831 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(

829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));	832 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));

830 if (isVectorType(Arg->getType())) {	833 if (isVectorType(Arg->getType())) {

831 _movp(Arg, Mem);	834 _movp(Arg, Mem);

832 } else {	835 } else {

833 _mov(Arg, Mem);	836 _mov(Arg, Mem);

834 }	837 }

835 // This argument-copying instruction uses an explicit Traits::X86OperandMem	838 // This argument-copying instruction uses an explicit Traits::X86OperandMem

836 // operand instead of a Variable, so its fill-from-stack operation has to be	839 // operand instead of a Variable, so its fill-from-stack operation has to be

837 // tracked separately for statistics.	840 // tracked separately for statistics.

838 Ctx->statsUpdateFills();	841 Ctx->statsUpdateFills();

839 }	842 }

840 }	843 }

841	844

842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {	845 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {

843 // TODO(jpp): this is wrong for x86-64.	846 return Traits::WordType;

844 return IceType_i32;

845 }	847 }

846	848

847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {	849 template <class Machine>

	850 template <typename T>

	851 typename std::enable_if<!T::Is64Bit, void>::type

	852 TargetX86Base<Machine>::split64(Variable *Var) {

848 switch (Var->getType()) {	853 switch (Var->getType()) {

849 default:	854 default:

850 return;	855 return;

851 case IceType_i64:	856 case IceType_i64:

852 // TODO: Only consider F64 if we need to push each half when	857 // TODO: Only consider F64 if we need to push each half when

853 // passing as an argument to a function call. Note that each half	858 // passing as an argument to a function call. Note that each half

854 // is still typed as I32.	859 // is still typed as I32.

855 case IceType_f64:	860 case IceType_f64:

856 break;	861 break;

857 }	862 }

(...skipping 11 matching lines...) Expand all Loading...
869 Hi->setName(Func, Var->getName(Func) + "__hi");	874 Hi->setName(Func, Var->getName(Func) + "__hi");

870 }	875 }

871 Var->setLoHi(Lo, Hi);	876 Var->setLoHi(Lo, Hi);

872 if (Var->getIsArg()) {	877 if (Var->getIsArg()) {

873 Lo->setIsArg();	878 Lo->setIsArg();

874 Hi->setIsArg();	879 Hi->setIsArg();

875 }	880 }

876 }	881 }

877	882

878 template <class Machine>	883 template <class Machine>

879 Operand TargetX86Base<Machine>::loOperand(Operand Operand) {	884 template <typename T>

	885 typename std::enable_if<!T::Is64Bit, Operand>::type *

	886 TargetX86Base<Machine>::loOperand(Operand *Operand) {

880 assert(Operand->getType() == IceType_i64 \|\|	887 assert(Operand->getType() == IceType_i64 \|\|

881 Operand->getType() == IceType_f64);	888 Operand->getType() == IceType_f64);

882 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)	889 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

883 return Operand;	890 return Operand;

884 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {	891 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {

885 split64(Var);	892 split64(Var);

886 return Var->getLo();	893 return Var->getLo();

887 }	894 }

888 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {	895 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

889 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(	896 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(

890 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));	897 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));

891 // Check if we need to blind/pool the constant.	898 // Check if we need to blind/pool the constant.

892 return legalize(ConstInt);	899 return legalize(ConstInt);

893 }	900 }

894 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {	901 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {

895 auto *MemOperand = Traits::X86OperandMem::create(	902 auto *MemOperand = Traits::X86OperandMem::create(

896 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),	903 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),

897 Mem->getShift(), Mem->getSegmentRegister());	904 Mem->getShift(), Mem->getSegmentRegister());

898 // Test if we should randomize or pool the offset, if so randomize it or	905 // Test if we should randomize or pool the offset, if so randomize it or

899 // pool it then create mem operand with the blinded/pooled constant.	906 // pool it then create mem operand with the blinded/pooled constant.

900 // Otherwise, return the mem operand as ordinary mem operand.	907 // Otherwise, return the mem operand as ordinary mem operand.

901 return legalize(MemOperand);	908 return legalize(MemOperand);

902 }	909 }

903 llvm_unreachable("Unsupported operand type");	910 llvm_unreachable("Unsupported operand type");

904 return nullptr;	911 return nullptr;

905 }	912 }

906	913

907 template <class Machine>	914 template <class Machine>

908 Operand TargetX86Base<Machine>::hiOperand(Operand Operand) {	915 template <typename T>

	916 typename std::enable_if<!T::Is64Bit, Operand>::type *

	917 TargetX86Base<Machine>::hiOperand(Operand *Operand) {

909 assert(Operand->getType() == IceType_i64 \|\|	918 assert(Operand->getType() == IceType_i64 \|\|

910 Operand->getType() == IceType_f64);	919 Operand->getType() == IceType_f64);

911 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)	920 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

912 return Operand;	921 return Operand;

913 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {	922 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {

914 split64(Var);	923 split64(Var);

915 return Var->getHi();	924 return Var->getHi();

916 }	925 }

917 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {	926 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

918 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(	927 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>(

(...skipping 181 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1100 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {	1109 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

1101 Variable *Dest = Inst->getDest();	1110 Variable *Dest = Inst->getDest();

1102 Operand *Src0 = legalize(Inst->getSrc(0));	1111 Operand *Src0 = legalize(Inst->getSrc(0));

1103 Operand *Src1 = legalize(Inst->getSrc(1));	1112 Operand *Src1 = legalize(Inst->getSrc(1));

1104 if (Inst->isCommutative()) {	1113 if (Inst->isCommutative()) {

1105 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))	1114 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

1106 std::swap(Src0, Src1);	1115 std::swap(Src0, Src1);

1107 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))	1116 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))

1108 std::swap(Src0, Src1);	1117 std::swap(Src0, Src1);

1109 }	1118 }

1110 if (Dest->getType() == IceType_i64) {	1119 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1111 // These helper-call-involved instructions are lowered in this	1120 // These x86-32 helper-call-involved instructions are lowered in this

1112 // separate switch. This is because loOperand() and hiOperand()	1121 // separate switch. This is because loOperand() and hiOperand()

1113 // may insert redundant instructions for constant blinding and	1122 // may insert redundant instructions for constant blinding and

1114 // pooling. Such redundant instructions will fail liveness analysis	1123 // pooling. Such redundant instructions will fail liveness analysis

1115 // under -Om1 setting. And, actually these arguments do not need	1124 // under -Om1 setting. And, actually these arguments do not need

1116 // to be processed with loOperand() and hiOperand() to be used.	1125 // to be processed with loOperand() and hiOperand() to be used.

1117 switch (Inst->getOp()) {	1126 switch (Inst->getOp()) {

1118 case InstArithmetic::Udiv: {	1127 case InstArithmetic::Udiv: {

1119 const SizeT MaxSrcs = 2;	1128 const SizeT MaxSrcs = 2;

1120 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);	1129 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);

1121 Call->addArg(Inst->getSrc(0));	1130 Call->addArg(Inst->getSrc(0));

(...skipping 527 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1649 // mov %ah, %al because it would make x86-64 codegen more complicated. If	1658 // mov %ah, %al because it would make x86-64 codegen more complicated. If

1650 // this ever becomes a problem we can introduce a pseudo rem instruction	1659 // this ever becomes a problem we can introduce a pseudo rem instruction

1651 // that returns the remainder in %al directly (and uses a mov for copying	1660 // that returns the remainder in %al directly (and uses a mov for copying

1652 // %ah to %al.)	1661 // %ah to %al.)

1653 static constexpr uint8_t AlSizeInBits = 8;	1662 static constexpr uint8_t AlSizeInBits = 8;

1654 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));	1663 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

1655 _mov(Dest, T);	1664 _mov(Dest, T);

1656 Context.insert(InstFakeUse::create(Func, T_eax));	1665 Context.insert(InstFakeUse::create(Func, T_eax));

1657 } else {	1666 } else {

1658 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1667 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1659 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1668 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);

	1669 _mov(T_edx, Zero);

1660 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1670 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1661 _div(T_edx, Src1, T);	1671 _div(T_edx, Src1, T);

1662 _mov(Dest, T_edx);	1672 _mov(Dest, T_edx);

1663 }	1673 }

1664 break;	1674 break;

1665 case InstArithmetic::Srem:	1675 case InstArithmetic::Srem:

1666 // TODO(stichnot): Enable this after doing better performance	1676 // TODO(stichnot): Enable this after doing better performance

1667 // and cross testing.	1677 // and cross testing.

1668 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1678 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1669 // Optimize mod by constant power of 2, but not for Om1 or O0,	1679 // Optimize mod by constant power of 2, but not for Om1 or O0,

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1714 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't	1724 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

1715 // mov %ah, %al because it would make x86-64 codegen more complicated. If	1725 // mov %ah, %al because it would make x86-64 codegen more complicated. If

1716 // this ever becomes a problem we can introduce a pseudo rem instruction	1726 // this ever becomes a problem we can introduce a pseudo rem instruction

1717 // that returns the remainder in %al directly (and uses a mov for copying	1727 // that returns the remainder in %al directly (and uses a mov for copying

1718 // %ah to %al.)	1728 // %ah to %al.)

1719 static constexpr uint8_t AlSizeInBits = 8;	1729 static constexpr uint8_t AlSizeInBits = 8;

1720 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));	1730 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

1721 _mov(Dest, T);	1731 _mov(Dest, T);

1722 Context.insert(InstFakeUse::create(Func, T_eax));	1732 Context.insert(InstFakeUse::create(Func, T_eax));

1723 } else {	1733 } else {

1724 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	1734 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);

1725 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1735 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1726 _cbwdq(T_edx, T);	1736 _cbwdq(T_edx, T);

1727 _idiv(T_edx, Src1, T);	1737 _idiv(T_edx, Src1, T);

1728 _mov(Dest, T_edx);	1738 _mov(Dest, T_edx);

1729 }	1739 }

1730 break;	1740 break;

1731 case InstArithmetic::Fadd:	1741 case InstArithmetic::Fadd:

1732 _mov(T, Src0);	1742 _mov(T, Src0);

1733 _addss(T, Src1);	1743 _addss(T, Src1);

1734 _mov(Dest, T);	1744 _mov(Dest, T);

(...skipping 23 matching lines...) Expand all Loading...
1758 return lowerCall(Call);	1768 return lowerCall(Call);

1759 }	1769 }

1760 }	1770 }

1761 }	1771 }

1762	1772

1763 template <class Machine>	1773 template <class Machine>

1764 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {	1774 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {

1765 Variable *Dest = Inst->getDest();	1775 Variable *Dest = Inst->getDest();

1766 Operand *Src0 = Inst->getSrc(0);	1776 Operand *Src0 = Inst->getSrc(0);

1767 assert(Dest->getType() == Src0->getType());	1777 assert(Dest->getType() == Src0->getType());

1768 if (Dest->getType() == IceType_i64) {	1778 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1769 Src0 = legalize(Src0);	1779 Src0 = legalize(Src0);

1770 Operand *Src0Lo = loOperand(Src0);	1780 Operand *Src0Lo = loOperand(Src0);

1771 Operand *Src0Hi = hiOperand(Src0);	1781 Operand *Src0Hi = hiOperand(Src0);

1772 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1782 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1773 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1783 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1774 Variable T_Lo = nullptr, T_Hi = nullptr;	1784 Variable T_Lo = nullptr, T_Hi = nullptr;

1775 _mov(T_Lo, Src0Lo);	1785 _mov(T_Lo, Src0Lo);

1776 _mov(DestLo, T_Lo);	1786 _mov(DestLo, T_Lo);

1777 _mov(T_Hi, Src0Hi);	1787 _mov(T_Hi, Src0Hi);

1778 _mov(DestHi, T_Hi);	1788 _mov(DestHi, T_Hi);

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1863 SizeT ShiftAmount =	1873 SizeT ShiftAmount =

1864 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -	1874 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -

1865 1;	1875 1;

1866 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);	1876 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

1867 Variable *T = makeReg(DestTy);	1877 Variable *T = makeReg(DestTy);

1868 _movp(T, Src0RM);	1878 _movp(T, Src0RM);

1869 _psll(T, ShiftConstant);	1879 _psll(T, ShiftConstant);

1870 _psra(T, ShiftConstant);	1880 _psra(T, ShiftConstant);

1871 _movp(Dest, T);	1881 _movp(Dest, T);

1872 }	1882 }

1873 } else if (Dest->getType() == IceType_i64) {	1883 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1874 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2	1884 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

1875 Constant *Shift = Ctx->getConstantInt32(31);	1885 Constant *Shift = Ctx->getConstantInt32(31);

1876 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1886 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1877 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1887 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1878 Variable *T_Lo = makeReg(DestLo->getType());	1888 Variable *T_Lo = makeReg(DestLo->getType());

1879 if (Src0RM->getType() == IceType_i32) {	1889 if (Src0RM->getType() == IceType_i32) {

1880 _mov(T_Lo, Src0RM);	1890 _mov(T_Lo, Src0RM);

1881 } else if (Src0RM->getType() == IceType_i1) {	1891 } else if (Src0RM->getType() == IceType_i1) {

1882 _movzx(T_Lo, Src0RM);	1892 _movzx(T_Lo, Src0RM);

1883 _shl(T_Lo, Shift);	1893 _shl(T_Lo, Shift);

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1923 case InstCast::Zext: {	1933 case InstCast::Zext: {

1924 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	1934 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

1925 if (isVectorType(Dest->getType())) {	1935 if (isVectorType(Dest->getType())) {

1926 // onemask = materialize(1,1,...); dest = onemask & src	1936 // onemask = materialize(1,1,...); dest = onemask & src

1927 Type DestTy = Dest->getType();	1937 Type DestTy = Dest->getType();

1928 Variable *OneMask = makeVectorOfOnes(DestTy);	1938 Variable *OneMask = makeVectorOfOnes(DestTy);

1929 Variable *T = makeReg(DestTy);	1939 Variable *T = makeReg(DestTy);

1930 _movp(T, Src0RM);	1940 _movp(T, Src0RM);

1931 _pand(T, OneMask);	1941 _pand(T, OneMask);

1932 _movp(Dest, T);	1942 _movp(Dest, T);

1933 } else if (Dest->getType() == IceType_i64) {	1943 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1934 // t1=movzx src; dst.lo=t1; dst.hi=0	1944 // t1=movzx src; dst.lo=t1; dst.hi=0

1935 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1945 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1936 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1946 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1937 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1947 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1938 Variable *Tmp = makeReg(DestLo->getType());	1948 Variable *Tmp = makeReg(DestLo->getType());

1939 if (Src0RM->getType() == IceType_i32) {	1949 if (Src0RM->getType() == IceType_i32) {

1940 _mov(Tmp, Src0RM);	1950 _mov(Tmp, Src0RM);

1941 } else {	1951 } else {

1942 _movzx(Tmp, Src0RM);	1952 _movzx(Tmp, Src0RM);

1943 }	1953 }

1944 if (Src0RM->getType() == IceType_i1) {	1954 if (Src0RM->getType() == IceType_i1) {

1945 Constant *One = Ctx->getConstantInt32(1);	1955 Constant *One = Ctx->getConstantInt32(1);

1946 _and(Tmp, One);	1956 _and(Tmp, One);

1947 }	1957 }

1948 _mov(DestLo, Tmp);	1958 _mov(DestLo, Tmp);

1949 _mov(DestHi, Zero);	1959 _mov(DestHi, Zero);

1950 } else if (Src0RM->getType() == IceType_i1) {	1960 } else if (Src0RM->getType() == IceType_i1) {

1951 // t = Src0RM; t &= 1; Dest = t	1961 // t = Src0RM; t &= 1; Dest = t

1952 Constant *One = Ctx->getConstantInt32(1);	1962 Constant *One = Ctx->getConstantInt32(1);

1953 Type DestTy = Dest->getType();	1963 Type DestTy = Dest->getType();

1954 Variable *T;	1964 Variable *T;

1955 if (DestTy == IceType_i8) {	1965 T = makeReg(IceType_i32);

1956 T = makeReg(DestTy);	1966 _mov(T, Src0RM);

1957 _mov(T, Src0RM);	1967 _and(T, One);

1958 } else {	1968 if (!Traits::Is64Bit) {

1959 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.	1969 assert(DestTy != IceType_i64);

1960 T = makeReg(IceType_i32);	1970 } else if (DestTy == IceType_i64) {

1961 _movzx(T, Src0RM);	1971 // In x86-64 we should be able to rely on mov reg, reg to zero extend T

	1972 // into Dest. At this point we can't ensure Dest will live in a

	1973 // register. Therefore, we use _movzx, which the assembler rightly

	1974 // converts to a 32-bit mov. A new temporary is created because the

	1975 // assembler does not know how to movzx to a memory location.

	1976 Variable *T_1 = makeReg(IceType_i64);

	1977 _movzx(T_1, T);

	1978 T = T_1;

1962 }	1979 }

1963 _and(T, One);

1964 _mov(Dest, T);	1980 _mov(Dest, T);

1965 } else {	1981 } else {

1966 // t1 = movzx src; dst = t1	1982 // t1 = movzx src; dst = t1

1967 Variable *T = makeReg(Dest->getType());	1983 Variable *T = makeReg(Dest->getType());

1968 _movzx(T, Src0RM);	1984 _movzx(T, Src0RM);

1969 _mov(Dest, T);	1985 _mov(Dest, T);

1970 }	1986 }

1971 break;	1987 break;

1972 }	1988 }

1973 case InstCast::Trunc: {	1989 case InstCast::Trunc: {

1974 if (isVectorType(Dest->getType())) {	1990 if (isVectorType(Dest->getType())) {

1975 // onemask = materialize(1,1,...); dst = src & onemask	1991 // onemask = materialize(1,1,...); dst = src & onemask

1976 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	1992 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

1977 Type Src0Ty = Src0RM->getType();	1993 Type Src0Ty = Src0RM->getType();

1978 Variable *OneMask = makeVectorOfOnes(Src0Ty);	1994 Variable *OneMask = makeVectorOfOnes(Src0Ty);

1979 Variable *T = makeReg(Dest->getType());	1995 Variable *T = makeReg(Dest->getType());

1980 _movp(T, Src0RM);	1996 _movp(T, Src0RM);

1981 _pand(T, OneMask);	1997 _pand(T, OneMask);

1982 _movp(Dest, T);	1998 _movp(Dest, T);

1983 } else {	1999 } else {

1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	2000 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

1985 if (Src0->getType() == IceType_i64)	2001 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)

1986 Src0 = loOperand(Src0);	2002 Src0 = loOperand(Src0);

1987 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2003 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

1988 // t1 = trunc Src0RM; Dest = t1	2004 // t1 = trunc Src0RM; Dest = t1

1989 Variable *T = nullptr;	2005 Variable *T = nullptr;

1990 _mov(T, Src0RM);	2006 _mov(T, Src0RM);

1991 if (Dest->getType() == IceType_i1)	2007 if (Dest->getType() == IceType_i1)

1992 _and(T, Ctx->getConstantInt1(1));	2008 _and(T, Ctx->getConstantInt1(1));

1993 _mov(Dest, T);	2009 _mov(Dest, T);

1994 }	2010 }

1995 break;	2011 break;

(...skipping 10 matching lines...) Expand all Loading...
2006 case InstCast::Fptosi:	2022 case InstCast::Fptosi:

2007 if (isVectorType(Dest->getType())) {	2023 if (isVectorType(Dest->getType())) {

2008 assert(Dest->getType() == IceType_v4i32 &&	2024 assert(Dest->getType() == IceType_v4i32 &&

2009 Inst->getSrc(0)->getType() == IceType_v4f32);	2025 Inst->getSrc(0)->getType() == IceType_v4f32);

2010 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2026 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2011 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2027 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2012 Src0RM = legalizeToReg(Src0RM);	2028 Src0RM = legalizeToReg(Src0RM);

2013 Variable *T = makeReg(Dest->getType());	2029 Variable *T = makeReg(Dest->getType());

2014 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);	2030 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

2015 _movp(Dest, T);	2031 _movp(Dest, T);

2016 } else if (Dest->getType() == IceType_i64) {	2032 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

2017 // Use a helper for converting floating-point values to 64-bit	2033 // Use a helper for converting floating-point values to 64-bit

2018 // integers. SSE2 appears to have no way to convert from xmm	2034 // integers. SSE2 appears to have no way to convert from xmm

2019 // registers to something like the edx:eax register pair, and	2035 // registers to something like the edx:eax register pair, and

2020 // gcc and clang both want to use x87 instructions complete with	2036 // gcc and clang both want to use x87 instructions complete with

2021 // temporary manipulation of the status word. This helper is	2037 // temporary manipulation of the status word. This helper is

2022 // not needed for x86-64.	2038 // not needed for x86-64.

2023 split64(Dest);	2039 split64(Dest);

2024 const SizeT MaxSrcs = 1;	2040 const SizeT MaxSrcs = 1;

2025 Type SrcType = Inst->getSrc(0)->getType();	2041 Type SrcType = Inst->getSrc(0)->getType();

2026 InstCall *Call =	2042 InstCall *Call =

2027 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64	2043 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2028 : H_fptosi_f64_i64,	2044 : H_fptosi_f64_i64,

2029 Dest, MaxSrcs);	2045 Dest, MaxSrcs);

2030 Call->addArg(Inst->getSrc(0));	2046 Call->addArg(Inst->getSrc(0));

2031 lowerCall(Call);	2047 lowerCall(Call);

2032 } else {	2048 } else {

2033 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2049 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2034 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2050 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2035 Variable *T_1 = makeReg(IceType_i32);	2051 Variable *T_1 = nullptr;

	2052 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {

	2053 T_1 = makeReg(IceType_i64);

	2054 } else {

	2055 assert(Dest->getType() != IceType_i64);

	2056 T_1 = makeReg(IceType_i32);

	2057 }

	2058 // cvt() requires its integer argument to be a GPR.

	2059 T_1->setWeightInfinite();

2036 Variable *T_2 = makeReg(Dest->getType());	2060 Variable *T_2 = makeReg(Dest->getType());

2037 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);	2061 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);

2038 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2062 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2039 if (Dest->getType() == IceType_i1)	2063 if (Dest->getType() == IceType_i1)

2040 _and(T_2, Ctx->getConstantInt1(1));	2064 _and(T_2, Ctx->getConstantInt1(1));

2041 _mov(Dest, T_2);	2065 _mov(Dest, T_2);

2042 }	2066 }

2043 break;	2067 break;

2044 case InstCast::Fptoui:	2068 case InstCast::Fptoui:

2045 if (isVectorType(Dest->getType())) {	2069 if (isVectorType(Dest->getType())) {

2046 assert(Dest->getType() == IceType_v4i32 &&	2070 assert(Dest->getType() == IceType_v4i32 &&

2047 Inst->getSrc(0)->getType() == IceType_v4f32);	2071 Inst->getSrc(0)->getType() == IceType_v4f32);

2048 const SizeT MaxSrcs = 1;	2072 const SizeT MaxSrcs = 1;

2049 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);	2073 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);

2050 Call->addArg(Inst->getSrc(0));	2074 Call->addArg(Inst->getSrc(0));

2051 lowerCall(Call);	2075 lowerCall(Call);

2052 } else if (Dest->getType() == IceType_i64 \|\|	2076 } else if (Dest->getType() == IceType_i64 \|\|

2053 Dest->getType() == IceType_i32) {	2077 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {

2054 // Use a helper for both x86-32 and x86-64.	2078 // Use a helper for both x86-32 and x86-64.

2055 split64(Dest);	2079 if (!Traits::Is64Bit)

	2080 split64(Dest);

2056 const SizeT MaxSrcs = 1;	2081 const SizeT MaxSrcs = 1;

2057 Type DestType = Dest->getType();	2082 Type DestType = Dest->getType();

2058 Type SrcType = Inst->getSrc(0)->getType();	2083 Type SrcType = Inst->getSrc(0)->getType();

2059 IceString TargetString;	2084 IceString TargetString;

2060 if (isInt32Asserting32Or64(DestType)) {	2085 if (Traits::Is64Bit) {

	2086 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
	Jim Stichnoth 2015/08/10 19:39:20 Does it make sense to combine the first and third Does it make sense to combine the first and third branches of this if/else/else construct, since they set TargetString to the same value? John 2015/08/10 20:41:17 I would rather not mix the cases. This is more exp Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > Does it make sense to combine the first and third branches of this if/else/else > construct, since they set TargetString to the same value? I would rather not mix the cases. This is more explicit about what's really happening (i.e., different actions for x64-32 v x32-64.)
	2087 : H_fptoui_f64_i64;

	2088 } else if (isInt32Asserting32Or64(DestType)) {

2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32	2089 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

2062 : H_fptoui_f64_i32;	2090 : H_fptoui_f64_i32;

2063 } else {	2091 } else {

2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64	2092 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2065 : H_fptoui_f64_i64;	2093 : H_fptoui_f64_i64;

2066 }	2094 }

2067 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);	2095 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2068 Call->addArg(Inst->getSrc(0));	2096 Call->addArg(Inst->getSrc(0));

2069 lowerCall(Call);	2097 lowerCall(Call);

2070 return;	2098 return;

2071 } else {	2099 } else {

2072 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2100 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2073 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2101 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2074 Variable *T_1 = makeReg(IceType_i32);	2102 assert(Dest->getType() != IceType_i64);

	2103 Variable *T_1 = nullptr;

	2104 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {

	2105 T_1 = makeReg(IceType_i64);

	2106 } else {

	2107 assert(Dest->getType() != IceType_i32);

	2108 T_1 = makeReg(IceType_i32);

	2109 }

	2110 T_1->setWeightInfinite();

2075 Variable *T_2 = makeReg(Dest->getType());	2111 Variable *T_2 = makeReg(Dest->getType());

2076 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);	2112 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);

2077 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2113 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2078 if (Dest->getType() == IceType_i1)	2114 if (Dest->getType() == IceType_i1)

2079 _and(T_2, Ctx->getConstantInt1(1));	2115 _and(T_2, Ctx->getConstantInt1(1));

2080 _mov(Dest, T_2);	2116 _mov(Dest, T_2);

2081 }	2117 }

2082 break;	2118 break;

2083 case InstCast::Sitofp:	2119 case InstCast::Sitofp:

2084 if (isVectorType(Dest->getType())) {	2120 if (isVectorType(Dest->getType())) {

2085 assert(Dest->getType() == IceType_v4f32 &&	2121 assert(Dest->getType() == IceType_v4f32 &&

2086 Inst->getSrc(0)->getType() == IceType_v4i32);	2122 Inst->getSrc(0)->getType() == IceType_v4i32);

2087 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2123 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2088 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2124 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2089 Src0RM = legalizeToReg(Src0RM);	2125 Src0RM = legalizeToReg(Src0RM);

2090 Variable *T = makeReg(Dest->getType());	2126 Variable *T = makeReg(Dest->getType());

2091 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);	2127 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);

2092 _movp(Dest, T);	2128 _movp(Dest, T);

2093 } else if (Inst->getSrc(0)->getType() == IceType_i64) {	2129 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {

2094 // Use a helper for x86-32.	2130 // Use a helper for x86-32.

2095 const SizeT MaxSrcs = 1;	2131 const SizeT MaxSrcs = 1;

2096 Type DestType = Dest->getType();	2132 Type DestType = Dest->getType();

2097 InstCall *Call =	2133 InstCall *Call =

2098 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32	2134 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32

2099 : H_sitofp_i64_f64,	2135 : H_sitofp_i64_f64,

2100 Dest, MaxSrcs);	2136 Dest, MaxSrcs);

2101 // TODO: Call the correct compiler-rt helper function.	2137 // TODO: Call the correct compiler-rt helper function.

2102 Call->addArg(Inst->getSrc(0));	2138 Call->addArg(Inst->getSrc(0));

2103 lowerCall(Call);	2139 lowerCall(Call);

2104 return;	2140 return;

2105 } else {	2141 } else {

2106 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2142 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2107 // Sign-extend the operand.	2143 // Sign-extend the operand.

2108 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2	2144 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

2109 Variable *T_1 = makeReg(IceType_i32);	2145 Variable *T_1 = nullptr;

	2146 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {

	2147 T_1 = makeReg(IceType_i64);

	2148 } else {

	2149 assert(Src0RM->getType() != IceType_i64);

	2150 T_1 = makeReg(IceType_i32);

	2151 }

	2152 T_1->setWeightInfinite();

2110 Variable *T_2 = makeReg(Dest->getType());	2153 Variable *T_2 = makeReg(Dest->getType());

2111 if (Src0RM->getType() == IceType_i32)	2154 if (Src0RM->getType() == T_1->getType())

2112 _mov(T_1, Src0RM);	2155 _mov(T_1, Src0RM);

2113 else	2156 else

2114 _movsx(T_1, Src0RM);	2157 _movsx(T_1, Src0RM);

2115 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2158 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2116 _mov(Dest, T_2);	2159 _mov(Dest, T_2);

2117 }	2160 }

2118 break;	2161 break;

2119 case InstCast::Uitofp: {	2162 case InstCast::Uitofp: {

2120 Operand *Src0 = Inst->getSrc(0);	2163 Operand *Src0 = Inst->getSrc(0);

2121 if (isVectorType(Src0->getType())) {	2164 if (isVectorType(Src0->getType())) {

2122 assert(Dest->getType() == IceType_v4f32 &&	2165 assert(Dest->getType() == IceType_v4f32 &&

2123 Src0->getType() == IceType_v4i32);	2166 Src0->getType() == IceType_v4i32);

2124 const SizeT MaxSrcs = 1;	2167 const SizeT MaxSrcs = 1;

2125 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);	2168 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2126 Call->addArg(Src0);	2169 Call->addArg(Src0);

2127 lowerCall(Call);	2170 lowerCall(Call);

2128 } else if (Src0->getType() == IceType_i64 \|\|	2171 } else if (Src0->getType() == IceType_i64 \|\|

2129 Src0->getType() == IceType_i32) {	2172 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

2130 // Use a helper for x86-32 and x86-64. Also use a helper for	2173 // Use a helper for x86-32 and x86-64. Also use a helper for

2131 // i32 on x86-32.	2174 // i32 on x86-32.

2132 const SizeT MaxSrcs = 1;	2175 const SizeT MaxSrcs = 1;

2133 Type DestType = Dest->getType();	2176 Type DestType = Dest->getType();

2134 IceString TargetString;	2177 IceString TargetString;

2135 if (isInt32Asserting32Or64(Src0->getType())) {	2178 if (isInt32Asserting32Or64(Src0->getType())) {

2136 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32	2179 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32

2137 : H_uitofp_i32_f64;	2180 : H_uitofp_i32_f64;

2138 } else {	2181 } else {

2139 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32	2182 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32

2140 : H_uitofp_i64_f64;	2183 : H_uitofp_i64_f64;

2141 }	2184 }

2142 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);	2185 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2143 Call->addArg(Src0);	2186 Call->addArg(Src0);

2144 lowerCall(Call);	2187 lowerCall(Call);

2145 return;	2188 return;

2146 } else {	2189 } else {

2147 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2190 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2148 // Zero-extend the operand.	2191 // Zero-extend the operand.

2149 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2	2192 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

2150 Variable *T_1 = makeReg(IceType_i32);	2193 Variable *T_1 = nullptr;

	2194 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {

	2195 T_1 = makeReg(IceType_i64);

	2196 } else {

	2197 assert(Src0RM->getType() != IceType_i64);

	2198 assert(Traits::Is64Bit \|\| Src0RM->getType() != IceType_i32);

	2199 T_1 = makeReg(IceType_i32);

	2200 }

	2201 T_1->setWeightInfinite();

2151 Variable *T_2 = makeReg(Dest->getType());	2202 Variable *T_2 = makeReg(Dest->getType());

2152 if (Src0RM->getType() == IceType_i32)	2203 if (Src0RM->getType() == T_1->getType())

2153 _mov(T_1, Src0RM);	2204 _mov(T_1, Src0RM);

2154 else	2205 else

2155 _movzx(T_1, Src0RM);	2206 _movzx(T_1, Src0RM);

2156 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2207 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2157 _mov(Dest, T_2);	2208 _mov(Dest, T_2);

2158 }	2209 }

2159 break;	2210 break;

2160 }	2211 }

2161 case InstCast::Bitcast: {	2212 case InstCast::Bitcast: {

2162 Operand *Src0 = Inst->getSrc(0);	2213 Operand *Src0 = Inst->getSrc(0);

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2198 typename Traits::SpillVariable *SpillVar =	2249 typename Traits::SpillVariable *SpillVar =

2199 Func->makeVariable<typename Traits::SpillVariable>(SrcType);	2250 Func->makeVariable<typename Traits::SpillVariable>(SrcType);

2200 SpillVar->setLinkedTo(Dest);	2251 SpillVar->setLinkedTo(Dest);

2201 Variable *Spill = SpillVar;	2252 Variable *Spill = SpillVar;

2202 Spill->setWeight(RegWeight::Zero);	2253 Spill->setWeight(RegWeight::Zero);

2203 _mov(T, Src0RM);	2254 _mov(T, Src0RM);

2204 _mov(Spill, T);	2255 _mov(Spill, T);

2205 _mov(Dest, Spill);	2256 _mov(Dest, Spill);

2206 } break;	2257 } break;

2207 case IceType_i64: {	2258 case IceType_i64: {

2208 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2259 assert(Src0->getType() == IceType_f64);

2209 assert(Src0RM->getType() == IceType_f64);	2260 if (Traits::Is64Bit) {

2210 // a.i64 = bitcast b.f64 ==>	2261 // Movd requires its fp argument (in this case, the bitcast source) to

2211 // s.f64 = spill b.f64	2262 // be an xmm register.

2212 // t_lo.i32 = lo(s.f64)	2263 Operand *Src0R = legalize(Src0, Legal_Reg);
	Jim Stichnoth 2015/08/10 19:39:20 Maybe this? Variable Src0R = legalizeToReg(Src0 Maybe this? Variable Src0R = legalizeToReg(Src0); John 2015/08/10 20:41:17 Done. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > Maybe this? > Variable *Src0R = legalizeToReg(Src0); Done.
2213 // a_lo.i32 = t_lo.i32	2264 Variable *T = makeReg(IceType_i64);

2214 // t_hi.i32 = hi(s.f64)	2265 _movd(T, Src0R);

2215 // a_hi.i32 = t_hi.i32	2266 _mov(Dest, T);

2216 Operand SpillLo, SpillHi;	2267 } else {

2217 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {	2268 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

	2269 // a.i64 = bitcast b.f64 ==>

	2270 // s.f64 = spill b.f64

	2271 // t_lo.i32 = lo(s.f64)

	2272 // a_lo.i32 = t_lo.i32

	2273 // t_hi.i32 = hi(s.f64)

	2274 // a_hi.i32 = t_hi.i32

	2275 Operand SpillLo, SpillHi;

	2276 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {

	2277 typename Traits::SpillVariable *SpillVar =

	2278 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);

	2279 SpillVar->setLinkedTo(Src0Var);

	2280 Variable *Spill = SpillVar;

	2281 Spill->setWeight(RegWeight::Zero);

	2282 _movq(Spill, Src0RM);

	2283 SpillLo = Traits::VariableSplit::create(Func, Spill,

	2284 Traits::VariableSplit::Low);

	2285 SpillHi = Traits::VariableSplit::create(Func, Spill,

	2286 Traits::VariableSplit::High);

	2287 } else {

	2288 SpillLo = loOperand(Src0RM);

	2289 SpillHi = hiOperand(Src0RM);

	2290 }

	2291

	2292 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	2293 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	2294 Variable *T_Lo = makeReg(IceType_i32);

	2295 Variable *T_Hi = makeReg(IceType_i32);

	2296

	2297 _mov(T_Lo, SpillLo);

	2298 _mov(DestLo, T_Lo);

	2299 _mov(T_Hi, SpillHi);

	2300 _mov(DestHi, T_Hi);

	2301 }

	2302 } break;

	2303 case IceType_f64: {

	2304 assert(Src0->getType() == IceType_i64);

	2305 if (Traits::Is64Bit) {

	2306 Operand *Src0R = legalize(Src0, Legal_Reg \| Legal_Mem);
	Jim Stichnoth 2015/08/10 19:39:20 Name this Src0RM Name this Src0RM John 2015/08/10 20:41:17 Done. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > Name this Src0RM Done.
	2307 Variable *T = makeReg(IceType_f64);

	2308 // Movd requires its fp argument (in this case, the bitcast destination)

	2309 // to be an xmm register.

	2310 T->setWeightInfinite();

	2311 _movd(T, Src0R);

	2312 _mov(Dest, T);

	2313 } else {

	2314 Src0 = legalize(Src0);

	2315 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {

	2316 Variable *T = Func->makeVariable(Dest->getType());

	2317 _movq(T, Src0);

	2318 _movq(Dest, T);

	2319 break;

	2320 }

	2321 // a.f64 = bitcast b.i64 ==>

	2322 // t_lo.i32 = b_lo.i32

	2323 // FakeDef(s.f64)

	2324 // lo(s.f64) = t_lo.i32

	2325 // t_hi.i32 = b_hi.i32

	2326 // hi(s.f64) = t_hi.i32

	2327 // a.f64 = s.f64

2218 typename Traits::SpillVariable *SpillVar =	2328 typename Traits::SpillVariable *SpillVar =

2219 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);	2329 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);

2220 SpillVar->setLinkedTo(Src0Var);	2330 SpillVar->setLinkedTo(Dest);

2221 Variable *Spill = SpillVar;	2331 Variable *Spill = SpillVar;

2222 Spill->setWeight(RegWeight::Zero);	2332 Spill->setWeight(RegWeight::Zero);

2223 _movq(Spill, Src0RM);	2333

2224 SpillLo = Traits::VariableSplit::create(Func, Spill,	2334 Variable T_Lo = nullptr, T_Hi = nullptr;

2225 Traits::VariableSplit::Low);	2335 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(

2226 SpillHi = Traits::VariableSplit::create(Func, Spill,	2336 Func, Spill, Traits::VariableSplit::Low);

2227 Traits::VariableSplit::High);	2337 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(

2228 } else {	2338 Func, Spill, Traits::VariableSplit::High);

2229 SpillLo = loOperand(Src0RM);	2339 _mov(T_Lo, loOperand(Src0));

2230 SpillHi = hiOperand(Src0RM);	2340 // Technically, the Spill is defined after the _store happens, but

	2341 // SpillLo is considered a "use" of Spill so define Spill before it

	2342 // is used.

	2343 Context.insert(InstFakeDef::create(Func, Spill));

	2344 _store(T_Lo, SpillLo);

	2345 _mov(T_Hi, hiOperand(Src0));

	2346 _store(T_Hi, SpillHi);

	2347 _movq(Dest, Spill);

2231 }	2348 }

2232

2233 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2234 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2235 Variable *T_Lo = makeReg(IceType_i32);

2236 Variable *T_Hi = makeReg(IceType_i32);

2237

2238 _mov(T_Lo, SpillLo);

2239 _mov(DestLo, T_Lo);

2240 _mov(T_Hi, SpillHi);

2241 _mov(DestHi, T_Hi);

2242 } break;

2243 case IceType_f64: {

2244 Src0 = legalize(Src0);

2245 assert(Src0->getType() == IceType_i64);

2246 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {

2247 Variable *T = Func->makeVariable(Dest->getType());

2248 _movq(T, Src0);

2249 _movq(Dest, T);

2250 break;

2251 }

2252 // a.f64 = bitcast b.i64 ==>

2253 // t_lo.i32 = b_lo.i32

2254 // FakeDef(s.f64)

2255 // lo(s.f64) = t_lo.i32

2256 // t_hi.i32 = b_hi.i32

2257 // hi(s.f64) = t_hi.i32

2258 // a.f64 = s.f64

2259 typename Traits::SpillVariable *SpillVar =

2260 Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);

2261 SpillVar->setLinkedTo(Dest);

2262 Variable *Spill = SpillVar;

2263 Spill->setWeight(RegWeight::Zero);

2264

2265 Variable T_Lo = nullptr, T_Hi = nullptr;

2266 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(

2267 Func, Spill, Traits::VariableSplit::Low);

2268 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(

2269 Func, Spill, Traits::VariableSplit::High);

2270 _mov(T_Lo, loOperand(Src0));

2271 // Technically, the Spill is defined after the _store happens, but

2272 // SpillLo is considered a "use" of Spill so define Spill before it

2273 // is used.

2274 Context.insert(InstFakeDef::create(Func, Spill));

2275 _store(T_Lo, SpillLo);

2276 _mov(T_Hi, hiOperand(Src0));

2277 _store(T_Hi, SpillHi);

2278 _movq(Dest, Spill);

2279 } break;	2349 } break;

2280 case IceType_v8i1: {	2350 case IceType_v8i1: {

2281 assert(Src0->getType() == IceType_i8);	2351 assert(Src0->getType() == IceType_i8);

2282 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);	2352 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);

2283 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());	2353 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

2284 // Arguments to functions are required to be at least 32 bits wide.	2354 // Arguments to functions are required to be at least 32 bits wide.

2285 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));	2355 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

2286 Call->addArg(Src0AsI32);	2356 Call->addArg(Src0AsI32);

2287 lowerCall(Call);	2357 lowerCall(Call);

2288 } break;	2358 } break;

(...skipping 319 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2608 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2678 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2609 _pxor(T, MinusOne);	2679 _pxor(T, MinusOne);

2610 } break;	2680 } break;

2611 }	2681 }

2612	2682

2613 _movp(Dest, T);	2683 _movp(Dest, T);

2614 eliminateNextVectorSextInstruction(Dest);	2684 eliminateNextVectorSextInstruction(Dest);

2615 return;	2685 return;

2616 }	2686 }

2617	2687

2618 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:	2688 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {

2619 if (Src0->getType() == IceType_i64) {	2689 lowerIcmp64(Inst);

2620 InstIcmp::ICond Condition = Inst->getCondition();

2621 size_t Index = static_cast<size_t>(Condition);

2622 assert(Index < Traits::TableIcmp64Size);

2623 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

2624 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

2625 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

2626 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

2627 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2628 Constant *One = Ctx->getConstantInt32(1);

2629 typename Traits::Insts::Label *LabelFalse =

2630 Traits::Insts::Label::create(Func, this);

2631 typename Traits::Insts::Label *LabelTrue =

2632 Traits::Insts::Label::create(Func, this);

2633 _mov(Dest, One);

2634 _cmp(Src0HiRM, Src1HiRI);

2635 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)

2636 _br(Traits::TableIcmp64[Index].C1, LabelTrue);

2637 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)

2638 _br(Traits::TableIcmp64[Index].C2, LabelFalse);

2639 _cmp(Src0LoRM, Src1LoRI);

2640 _br(Traits::TableIcmp64[Index].C3, LabelTrue);

2641 Context.insert(LabelFalse);

2642 _mov_nonkillable(Dest, Zero);

2643 Context.insert(LabelTrue);

2644 return;	2690 return;

2645 }	2691 }

2646	2692

2647 // cmp b, c	2693 // cmp b, c

2648 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);	2694 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

2649 _cmp(Src0RM, Src1);	2695 _cmp(Src0RM, Src1);

2650 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));	2696 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));

2651 }	2697 }

2652	2698

	2699 template <typename Machine>

	2700 template <typename T>

	2701 typename std::enable_if<!T::Is64Bit, void>::type

	2702 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {

	2703 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

	2704 Operand *Src0 = legalize(Inst->getSrc(0));

	2705 Operand *Src1 = legalize(Inst->getSrc(1));

	2706 Variable *Dest = Inst->getDest();

	2707 InstIcmp::ICond Condition = Inst->getCondition();

	2708 size_t Index = static_cast<size_t>(Condition);

	2709 assert(Index < Traits::TableIcmp64Size);

	2710 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

	2711 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

	2712 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

	2713 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

	2714 Constant *Zero = Ctx->getConstantZero(IceType_i32);

	2715 Constant *One = Ctx->getConstantInt32(1);

	2716 typename Traits::Insts::Label *LabelFalse =

	2717 Traits::Insts::Label::create(Func, this);

	2718 typename Traits::Insts::Label *LabelTrue =

	2719 Traits::Insts::Label::create(Func, this);

	2720 _mov(Dest, One);

	2721 _cmp(Src0HiRM, Src1HiRI);

	2722 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)

	2723 _br(Traits::TableIcmp64[Index].C1, LabelTrue);

	2724 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)

	2725 _br(Traits::TableIcmp64[Index].C2, LabelFalse);

	2726 _cmp(Src0LoRM, Src1LoRI);

	2727 _br(Traits::TableIcmp64[Index].C3, LabelTrue);

	2728 Context.insert(LabelFalse);

	2729 _mov_nonkillable(Dest, Zero);

	2730 Context.insert(LabelTrue);

	2731 }

	2732

2653 template <class Machine>	2733 template <class Machine>

2654 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {	2734 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {

2655 Operand *SourceVectNotLegalized = Inst->getSrc(0);	2735 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2656 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);	2736 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);

2657 ConstantInteger32 *ElementIndex =	2737 ConstantInteger32 *ElementIndex =

2658 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));	2738 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));

2659 // Only constant indices are allowed in PNaCl IR.	2739 // Only constant indices are allowed in PNaCl IR.

2660 assert(ElementIndex);	2740 assert(ElementIndex);

2661 unsigned Index = ElementIndex->getValue();	2741 unsigned Index = ElementIndex->getValue();

2662 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));	2742 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));

(...skipping 178 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2841 }	2921 }

2842 case Intrinsics::AtomicLoad: {	2922 case Intrinsics::AtomicLoad: {

2843 // We require the memory address to be naturally aligned.	2923 // We require the memory address to be naturally aligned.

2844 // Given that is the case, then normal loads are atomic.	2924 // Given that is the case, then normal loads are atomic.

2845 if (!Intrinsics::isMemoryOrderValid(	2925 if (!Intrinsics::isMemoryOrderValid(

2846 ID, getConstantMemoryOrder(Instr->getArg(1)))) {	2926 ID, getConstantMemoryOrder(Instr->getArg(1)))) {

2847 Func->setError("Unexpected memory ordering for AtomicLoad");	2927 Func->setError("Unexpected memory ordering for AtomicLoad");

2848 return;	2928 return;

2849 }	2929 }

2850 Variable *Dest = Instr->getDest();	2930 Variable *Dest = Instr->getDest();

2851 if (Dest->getType() == IceType_i64) {	2931 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

2852 // Follow what GCC does and use a movq instead of what lowerLoad()	2932 // Follow what GCC does and use a movq instead of what lowerLoad()

2853 // normally does (split the load into two).	2933 // normally does (split the load into two).

2854 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding	2934 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding

2855 // can't happen anyway, since this is x86-32 and integer arithmetic only	2935 // can't happen anyway, since this is x86-32 and integer arithmetic only

2856 // happens on 32-bit quantities.	2936 // happens on 32-bit quantities.

2857 Variable *T = makeReg(IceType_f64);	2937 Variable *T = makeReg(IceType_f64);

2858 typename Traits::X86OperandMem *Addr =	2938 typename Traits::X86OperandMem *Addr =

2859 formMemoryOperand(Instr->getArg(0), IceType_f64);	2939 formMemoryOperand(Instr->getArg(0), IceType_f64);

2860 _movq(T, Addr);	2940 _movq(T, Addr);

2861 // Then cast the bits back out of the XMM register to the i64 Dest.	2941 // Then cast the bits back out of the XMM register to the i64 Dest.

2862 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);	2942 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);

2863 lowerCast(Cast);	2943 lowerCast(Cast);

2864 // Make sure that the atomic load isn't elided when unused.	2944 // Make sure that the atomic load isn't elided when unused.

2865 Context.insert(InstFakeUse::create(Func, Dest->getLo()));	2945 Context.insert(InstFakeUse::create(Func, Dest->getLo()));

2866 Context.insert(InstFakeUse::create(Func, Dest->getHi()));	2946 Context.insert(InstFakeUse::create(Func, Dest->getHi()));

2867 return;	2947 return;

2868 }	2948 }

2869 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));	2949 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));

2870 lowerLoad(Load);	2950 lowerLoad(Load);

2871 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.	2951 // Make sure the atomic load isn't elided when unused, by adding a
	Jim Stichnoth 2015/08/10 19:39:19 hmm, why is a linebreak added? hmm, why is a linebreak added? John 2015/08/10 20:41:17 make format. Show quoted text On 2015/08/10 19:39:19, stichnot wrote: > hmm, why is a linebreak added? make format. Jim Stichnoth 2015/08/11 16:01:36 Oh, so something like, you added an outer layer of Show quoted text On 2015/08/10 20:41:17, John wrote: > On 2015/08/10 19:39:19, stichnot wrote: > > hmm, why is a linebreak added? > > make format. Oh, so something like, you added an outer layer of {}, ran make format, then removed the {} layer? (I ask so that I can avoid it happening to me...) John 2015/08/12 19:27:55 Probably. I find annoying that clang format will b Show quoted text On 2015/08/11 16:01:36, stichnot wrote: > On 2015/08/10 20:41:17, John wrote: > > On 2015/08/10 19:39:19, stichnot wrote: > > > hmm, why is a linebreak added? > > > > make format. > > Oh, so something like, you added an outer layer of {}, ran make format, then > removed the {} layer? (I ask so that I can avoid it happening to me...) Probably. I find annoying that clang format will break lines but will not merge them back if at all possible.
	2952 // FakeUse.

2872 // Since lowerLoad may fuse the load w/ an arithmetic instruction,	2953 // Since lowerLoad may fuse the load w/ an arithmetic instruction,

2873 // insert the FakeUse on the last-inserted instruction's dest.	2954 // insert the FakeUse on the last-inserted instruction's dest.

2874 Context.insert(	2955 Context.insert(

2875 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));	2956 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

2876 return;	2957 return;

2877 }	2958 }

2878 case Intrinsics::AtomicRMW:	2959 case Intrinsics::AtomicRMW:

2879 if (!Intrinsics::isMemoryOrderValid(	2960 if (!Intrinsics::isMemoryOrderValid(

2880 ID, getConstantMemoryOrder(Instr->getArg(3)))) {	2961 ID, getConstantMemoryOrder(Instr->getArg(3)))) {

2881 Func->setError("Unexpected memory ordering for AtomicRMW");	2962 Func->setError("Unexpected memory ordering for AtomicRMW");

2882 return;	2963 return;

2883 }	2964 }

2884 lowerAtomicRMW(	2965 lowerAtomicRMW(

2885 Instr->getDest(),	2966 Instr->getDest(),

2886 static_cast<uint32_t>(	2967 static_cast<uint32_t>(

2887 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),	2968 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),

2888 Instr->getArg(1), Instr->getArg(2));	2969 Instr->getArg(1), Instr->getArg(2));

2889 return;	2970 return;

2890 case Intrinsics::AtomicStore: {	2971 case Intrinsics::AtomicStore: {

2891 if (!Intrinsics::isMemoryOrderValid(	2972 if (!Intrinsics::isMemoryOrderValid(

2892 ID, getConstantMemoryOrder(Instr->getArg(2)))) {	2973 ID, getConstantMemoryOrder(Instr->getArg(2)))) {

2893 Func->setError("Unexpected memory ordering for AtomicStore");	2974 Func->setError("Unexpected memory ordering for AtomicStore");

2894 return;	2975 return;

2895 }	2976 }

2896 // We require the memory address to be naturally aligned.	2977 // We require the memory address to be naturally aligned.

2897 // Given that is the case, then normal stores are atomic.	2978 // Given that is the case, then normal stores are atomic.

2898 // Add a fence after the store to make it visible.	2979 // Add a fence after the store to make it visible.

2899 Operand *Value = Instr->getArg(0);	2980 Operand *Value = Instr->getArg(0);

2900 Operand *Ptr = Instr->getArg(1);	2981 Operand *Ptr = Instr->getArg(1);

2901 if (Value->getType() == IceType_i64) {	2982 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {

2902 // Use a movq instead of what lowerStore() normally does	2983 // Use a movq instead of what lowerStore() normally does

2903 // (split the store into two), following what GCC does.	2984 // (split the store into two), following what GCC does.

2904 // Cast the bits from int -> to an xmm register first.	2985 // Cast the bits from int -> to an xmm register first.

2905 Variable *T = makeReg(IceType_f64);	2986 Variable *T = makeReg(IceType_f64);

2906 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);	2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);

2907 lowerCast(Cast);	2988 lowerCast(Cast);

2908 // Then store XMM w/ a movq.	2989 // Then store XMM w/ a movq.

2909 typename Traits::X86OperandMem *Addr =	2990 typename Traits::X86OperandMem *Addr =

2910 formMemoryOperand(Ptr, IceType_f64);	2991 formMemoryOperand(Ptr, IceType_f64);

2911 _storeq(T, Addr);	2992 _storeq(T, Addr);

2912 _mfence();	2993 _mfence();

2913 return;	2994 return;

2914 }	2995 }

2915 InstStore *Store = InstStore::create(Func, Value, Ptr);	2996 InstStore *Store = InstStore::create(Func, Value, Ptr);

2916 lowerStore(Store);	2997 lowerStore(Store);

2917 _mfence();	2998 _mfence();

2918 return;	2999 return;

2919 }	3000 }

2920 case Intrinsics::Bswap: {	3001 case Intrinsics::Bswap: {

2921 Variable *Dest = Instr->getDest();	3002 Variable *Dest = Instr->getDest();

2922 Operand *Val = Instr->getArg(0);	3003 Operand *Val = Instr->getArg(0);

2923 // In 32-bit mode, bswap only works on 32-bit arguments, and the	3004 // In 32-bit mode, bswap only works on 32-bit arguments, and the

2924 // argument must be a register. Use rotate left for 16-bit bswap.	3005 // argument must be a register. Use rotate left for 16-bit bswap.

2925 if (Val->getType() == IceType_i64) {	3006 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

2926 Val = legalizeUndef(Val);	3007 Val = legalizeUndef(Val);

2927 Variable *T_Lo = legalizeToReg(loOperand(Val));	3008 Variable *T_Lo = legalizeToReg(loOperand(Val));

2928 Variable *T_Hi = legalizeToReg(hiOperand(Val));	3009 Variable *T_Hi = legalizeToReg(hiOperand(Val));

2929 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2930 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2931 _bswap(T_Lo);	3012 _bswap(T_Lo);

2932 _bswap(T_Hi);	3013 _bswap(T_Hi);

2933 _mov(DestLo, T_Hi);	3014 _mov(DestLo, T_Hi);

2934 _mov(DestHi, T_Lo);	3015 _mov(DestHi, T_Lo);

2935 } else if (Val->getType() == IceType_i32) {	3016 } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) \|\|

	3017 Val->getType() == IceType_i32) {

2936 Variable *T = legalizeToReg(Val);	3018 Variable *T = legalizeToReg(Val);

2937 _bswap(T);	3019 _bswap(T);

2938 _mov(Dest, T);	3020 _mov(Dest, T);

2939 } else {	3021 } else {

2940 assert(Val->getType() == IceType_i16);	3022 assert(Val->getType() == IceType_i16);

2941 Constant *Eight = Ctx->getConstantInt16(8);	3023 Constant *Eight = Ctx->getConstantInt16(8);

2942 Variable *T = nullptr;	3024 Variable *T = nullptr;

2943 Val = legalize(Val);	3025 Val = legalize(Val);

2944 _mov(T, Val);	3026 _mov(T, Val);

2945 _rol(T, Eight);	3027 _rol(T, Eight);

2946 _mov(Dest, T);	3028 _mov(Dest, T);

2947 }	3029 }

2948 return;	3030 return;

2949 }	3031 }

2950 case Intrinsics::Ctpop: {	3032 case Intrinsics::Ctpop: {

2951 Variable *Dest = Instr->getDest();	3033 Variable *Dest = Instr->getDest();

	3034 Variable *T = nullptr;

2952 Operand *Val = Instr->getArg(0);	3035 Operand *Val = Instr->getArg(0);

2953 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())	3036 Type ValTy = Val->getType();

2954 ? H_call_ctpop_i32	3037 assert(ValTy == IceType_i32 \|\| ValTy == IceType_i64);

2955 : H_call_ctpop_i64,	3038

2956 Dest, 1);	3039 if (!Traits::Is64Bit) {

	3040 T = Dest;

	3041 } else {

	3042 T = makeReg(IceType_i64);

	3043 if (ValTy == IceType_i32) {

	3044 // in x86-64, __popcountsi2 is not defined, so we cheat a bit by

	3045 // converting it to a 64-bit value, and using ctpop_i64. _movzx should

	3046 // ensure we will not have any bits set on Val's upper 32 bits.

	3047 Variable *V = makeReg(IceType_i64);

	3048 _movzx(V, Val);

	3049 Val = V;

	3050 }

	3051 ValTy = IceType_i64;

	3052 }

	3053

	3054 InstCall *Call = makeHelperCall(

	3055 ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);

2957 Call->addArg(Val);	3056 Call->addArg(Val);

2958 lowerCall(Call);	3057 lowerCall(Call);

2959 // The popcount helpers always return 32-bit values, while the intrinsic's	3058 // The popcount helpers always return 32-bit values, while the intrinsic's

2960 // signature matches the native POPCNT instruction and fills a 64-bit reg	3059 // signature matches the native POPCNT instruction and fills a 64-bit reg

2961 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case	3060 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case

2962 // the user doesn't do that in the IR. If the user does that in the IR,	3061 // the user doesn't do that in the IR. If the user does that in the IR,

2963 // then this zero'ing instruction is dead and gets optimized out.	3062 // then this zero'ing instruction is dead and gets optimized out.

2964 if (Val->getType() == IceType_i64) {	3063 if (!Traits::Is64Bit) {

2965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3064 assert(T == Dest);

2966 Constant *Zero = Ctx->getConstantZero(IceType_i32);	3065 if (Val->getType() == IceType_i64) {

2967 _mov(DestHi, Zero);	3066 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	3067 Constant *Zero = Ctx->getConstantZero(IceType_i32);

	3068 _mov(DestHi, Zero);

	3069 }

	3070 } else {

	3071 assert(Val->getType() == IceType_i64);

	3072 // T is 64 bit. It needs to be copied to dest. We need to:

	3073 //

	3074 // T_1.32 = trunc T.64 to i32

	3075 // T_2.64 = zext T_1.32 to i64

	3076 // Dest.<<right_size>> = T_2.<<right_size>>

	3077 //

	3078 // which ensures the upper 32 bits will always be cleared. Just doing a

	3079 //

	3080 // mov Dest.32 = trunc T.32 to i32

	3081 //

	3082 // is dangerous because there's a chance the copiler will optimize this
	Jim Stichnoth 2015/08/10 19:39:20 compiler compiler John 2015/08/10 20:41:17 Done. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > compiler Done.
	3083 // copy out. To use _movzx we need two new registers (one 32-, and

	3084 // another 64-bit wide.)

	3085 Variable *T_1 = makeReg(IceType_i32);

	3086 _mov(T_1, T);

	3087 Variable *T_2 = makeReg(IceType_i64);

	3088 _movzx(T_2, T_1);

	3089 _mov(Dest, T_2);

2968 }	3090 }

2969 return;	3091 return;

2970 }	3092 }

2971 case Intrinsics::Ctlz: {	3093 case Intrinsics::Ctlz: {

2972 // The "is zero undef" parameter is ignored and we always return	3094 // The "is zero undef" parameter is ignored and we always return

2973 // a well-defined value.	3095 // a well-defined value.

2974 Operand *Val = legalize(Instr->getArg(0));	3096 Operand *Val = legalize(Instr->getArg(0));

2975 Operand *FirstVal;	3097 Operand *FirstVal;

2976 Operand *SecondVal = nullptr;	3098 Operand *SecondVal = nullptr;

2977 if (Val->getType() == IceType_i64) {	3099 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

2978 FirstVal = loOperand(Val);	3100 FirstVal = loOperand(Val);

2979 SecondVal = hiOperand(Val);	3101 SecondVal = hiOperand(Val);

2980 } else {	3102 } else {

2981 FirstVal = Val;	3103 FirstVal = Val;

2982 }	3104 }

2983 const bool IsCttz = false;	3105 const bool IsCttz = false;

2984 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3106 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

2985 SecondVal);	3107 SecondVal);

2986 return;	3108 return;

2987 }	3109 }

2988 case Intrinsics::Cttz: {	3110 case Intrinsics::Cttz: {

2989 // The "is zero undef" parameter is ignored and we always return	3111 // The "is zero undef" parameter is ignored and we always return

2990 // a well-defined value.	3112 // a well-defined value.

2991 Operand *Val = legalize(Instr->getArg(0));	3113 Operand *Val = legalize(Instr->getArg(0));

2992 Operand *FirstVal;	3114 Operand *FirstVal;

2993 Operand *SecondVal = nullptr;	3115 Operand *SecondVal = nullptr;

2994 if (Val->getType() == IceType_i64) {	3116 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

2995 FirstVal = hiOperand(Val);	3117 FirstVal = hiOperand(Val);

2996 SecondVal = loOperand(Val);	3118 SecondVal = loOperand(Val);

2997 } else {	3119 } else {

2998 FirstVal = Val;	3120 FirstVal = Val;

2999 }	3121 }

3000 const bool IsCttz = true;	3122 const bool IsCttz = true;

3001 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3123 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3002 SecondVal);	3124 SecondVal);

3003 return;	3125 return;

3004 }	3126 }

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3098 Func->setError("Should not be lowering UnknownIntrinsic");	3220 Func->setError("Should not be lowering UnknownIntrinsic");

3099 return;	3221 return;

3100 }	3222 }

3101 return;	3223 return;

3102 }	3224 }

3103	3225

3104 template <class Machine>	3226 template <class Machine>

3105 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,	3227 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,

3106 Operand Ptr, Operand Expected,	3228 Operand Ptr, Operand Expected,

3107 Operand *Desired) {	3229 Operand *Desired) {

3108 if (Expected->getType() == IceType_i64) {	3230 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {

3109 // Reserve the pre-colored registers first, before adding any more	3231 // Reserve the pre-colored registers first, before adding any more

3110 // infinite-weight variables from formMemoryOperand's legalization.	3232 // infinite-weight variables from formMemoryOperand's legalization.

3111 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	3233 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

3112 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	3234 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

3113 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);	3235 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

3114 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);	3236 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);

3115 _mov(T_eax, loOperand(Expected));	3237 _mov(T_eax, loOperand(Expected));

3116 _mov(T_edx, hiOperand(Expected));	3238 _mov(T_edx, hiOperand(Expected));

3117 _mov(T_ebx, loOperand(Desired));	3239 _mov(T_ebx, loOperand(Desired));

3118 _mov(T_ecx, hiOperand(Desired));	3240 _mov(T_ecx, hiOperand(Desired));

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3157 // [%y_phi = ...] // list of phi stores	3279 // [%y_phi = ...] // list of phi stores

3158 // br eq, %l1, %l2	3280 // br eq, %l1, %l2

3159 InstList::iterator I = Context.getCur();	3281 InstList::iterator I = Context.getCur();

3160 // I is currently the InstIntrinsicCall. Peek past that.	3282 // I is currently the InstIntrinsicCall. Peek past that.

3161 // This assumes that the atomic cmpxchg has not been lowered yet,	3283 // This assumes that the atomic cmpxchg has not been lowered yet,

3162 // so that the instructions seen in the scan from "Cur" is simple.	3284 // so that the instructions seen in the scan from "Cur" is simple.

3163 assert(llvm::isa<InstIntrinsicCall>(*I));	3285 assert(llvm::isa<InstIntrinsicCall>(*I));

3164 Inst *NextInst = Context.getNextInst(I);	3286 Inst *NextInst = Context.getNextInst(I);

3165 if (!NextInst)	3287 if (!NextInst)

3166 return false;	3288 return false;

3167 // There might be phi assignments right before the compare+branch, since this	3289 // There might be phi assignments right before the compare+branch, since
	Jim Stichnoth 2015/08/10 19:39:20 reformat reformat John 2015/08/10 20:41:17 Done. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > reformat Done.
	3290 // this

3168 // could be a backward branch for a loop. This placement of assignments is	3291 // could be a backward branch for a loop. This placement of assignments is

3169 // determined by placePhiStores().	3292 // determined by placePhiStores().

3170 std::vector<InstAssign *> PhiAssigns;	3293 std::vector<InstAssign *> PhiAssigns;

3171 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {	3294 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {

3172 if (PhiAssign->getDest() == Dest)	3295 if (PhiAssign->getDest() == Dest)

3173 return false;	3296 return false;

3174 PhiAssigns.push_back(PhiAssign);	3297 PhiAssigns.push_back(PhiAssign);

3175 NextInst = Context.getNextInst(I);	3298 NextInst = Context.getNextInst(I);

3176 if (!NextInst)	3299 if (!NextInst)

3177 return false;	3300 return false;

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3216 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,	3339 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

3217 Operand Ptr, Operand Val) {	3340 Operand Ptr, Operand Val) {

3218 bool NeedsCmpxchg = false;	3341 bool NeedsCmpxchg = false;

3219 LowerBinOp Op_Lo = nullptr;	3342 LowerBinOp Op_Lo = nullptr;

3220 LowerBinOp Op_Hi = nullptr;	3343 LowerBinOp Op_Hi = nullptr;

3221 switch (Operation) {	3344 switch (Operation) {

3222 default:	3345 default:

3223 Func->setError("Unknown AtomicRMW operation");	3346 Func->setError("Unknown AtomicRMW operation");

3224 return;	3347 return;

3225 case Intrinsics::AtomicAdd: {	3348 case Intrinsics::AtomicAdd: {

3226 if (Dest->getType() == IceType_i64) {	3349 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3227 // All the fall-through paths must set this to true, but use this	3350 // All the fall-through paths must set this to true, but use this

3228 // for asserting.	3351 // for asserting.

3229 NeedsCmpxchg = true;	3352 NeedsCmpxchg = true;

3230 Op_Lo = &TargetX86Base<Machine>::_add;	3353 Op_Lo = &TargetX86Base<Machine>::_add;

3231 Op_Hi = &TargetX86Base<Machine>::_adc;	3354 Op_Hi = &TargetX86Base<Machine>::_adc;

3232 break;	3355 break;

3233 }	3356 }

3234 typename Traits::X86OperandMem *Addr =	3357 typename Traits::X86OperandMem *Addr =

3235 formMemoryOperand(Ptr, Dest->getType());	3358 formMemoryOperand(Ptr, Dest->getType());

3236 const bool Locked = true;	3359 const bool Locked = true;

3237 Variable *T = nullptr;	3360 Variable *T = nullptr;

3238 _mov(T, Val);	3361 _mov(T, Val);

3239 _xadd(Addr, T, Locked);	3362 _xadd(Addr, T, Locked);

3240 _mov(Dest, T);	3363 _mov(Dest, T);

3241 return;	3364 return;

3242 }	3365 }

3243 case Intrinsics::AtomicSub: {	3366 case Intrinsics::AtomicSub: {

3244 if (Dest->getType() == IceType_i64) {	3367 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3245 NeedsCmpxchg = true;	3368 NeedsCmpxchg = true;

3246 Op_Lo = &TargetX86Base<Machine>::_sub;	3369 Op_Lo = &TargetX86Base<Machine>::_sub;

3247 Op_Hi = &TargetX86Base<Machine>::_sbb;	3370 Op_Hi = &TargetX86Base<Machine>::_sbb;

3248 break;	3371 break;

3249 }	3372 }

3250 typename Traits::X86OperandMem *Addr =	3373 typename Traits::X86OperandMem *Addr =

3251 formMemoryOperand(Ptr, Dest->getType());	3374 formMemoryOperand(Ptr, Dest->getType());

3252 const bool Locked = true;	3375 const bool Locked = true;

3253 Variable *T = nullptr;	3376 Variable *T = nullptr;

3254 _mov(T, Val);	3377 _mov(T, Val);

(...skipping 16 matching lines...) Expand all Loading...
3271 NeedsCmpxchg = true;	3394 NeedsCmpxchg = true;

3272 Op_Lo = &TargetX86Base<Machine>::_and;	3395 Op_Lo = &TargetX86Base<Machine>::_and;

3273 Op_Hi = &TargetX86Base<Machine>::_and;	3396 Op_Hi = &TargetX86Base<Machine>::_and;

3274 break;	3397 break;

3275 case Intrinsics::AtomicXor:	3398 case Intrinsics::AtomicXor:

3276 NeedsCmpxchg = true;	3399 NeedsCmpxchg = true;

3277 Op_Lo = &TargetX86Base<Machine>::_xor;	3400 Op_Lo = &TargetX86Base<Machine>::_xor;

3278 Op_Hi = &TargetX86Base<Machine>::_xor;	3401 Op_Hi = &TargetX86Base<Machine>::_xor;

3279 break;	3402 break;

3280 case Intrinsics::AtomicExchange:	3403 case Intrinsics::AtomicExchange:

3281 if (Dest->getType() == IceType_i64) {	3404 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3282 NeedsCmpxchg = true;	3405 NeedsCmpxchg = true;

3283 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values	3406 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values

3284 // just need to be moved to the ecx and ebx registers.	3407 // just need to be moved to the ecx and ebx registers.

3285 Op_Lo = nullptr;	3408 Op_Lo = nullptr;

3286 Op_Hi = nullptr;	3409 Op_Hi = nullptr;

3287 break;	3410 break;

3288 }	3411 }

3289 typename Traits::X86OperandMem *Addr =	3412 typename Traits::X86OperandMem *Addr =

3290 formMemoryOperand(Ptr, Dest->getType());	3413 formMemoryOperand(Ptr, Dest->getType());

3291 Variable *T = nullptr;	3414 Variable *T = nullptr;

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3325 // .LABEL:	3448 // .LABEL:

3326 // mov <reg>, eax	3449 // mov <reg>, eax

3327 // op <reg>, [desired_adj]	3450 // op <reg>, [desired_adj]

3328 // lock cmpxchg [ptr], <reg>	3451 // lock cmpxchg [ptr], <reg>

3329 // jne .LABEL	3452 // jne .LABEL

3330 // mov <dest>, eax	3453 // mov <dest>, eax

3331 //	3454 //

3332 // If Op_{Lo,Hi} are nullptr, then just copy the value.	3455 // If Op_{Lo,Hi} are nullptr, then just copy the value.

3333 Val = legalize(Val);	3456 Val = legalize(Val);

3334 Type Ty = Val->getType();	3457 Type Ty = Val->getType();

3335 if (Ty == IceType_i64) {	3458 if (!Traits::Is64Bit && Ty == IceType_i64) {

3336 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	3459 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

3337 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	3460 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

3338 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);	3461 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3339 _mov(T_eax, loOperand(Addr));	3462 _mov(T_eax, loOperand(Addr));

3340 _mov(T_edx, hiOperand(Addr));	3463 _mov(T_edx, hiOperand(Addr));

3341 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);	3464 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

3342 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);	3465 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);

3343 typename Traits::Insts::Label *Label =	3466 typename Traits::Insts::Label *Label =

3344 Traits::Insts::Label::create(Func, this);	3467 Traits::Insts::Label::create(Func, this);

3345 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;	3468 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3426 // cmovne T_DEST, IF_NOT_ZERO	3549 // cmovne T_DEST, IF_NOT_ZERO

3427 // xor T_DEST, 31	3550 // xor T_DEST, 31

3428 // mov DEST, T_DEST	3551 // mov DEST, T_DEST

3429 //	3552 //

3430 // NOTE: T_DEST must be a register because cmov requires its dest to be a	3553 // NOTE: T_DEST must be a register because cmov requires its dest to be a

3431 // register. Also, bsf and bsr require their dest to be a register.	3554 // register. Also, bsf and bsr require their dest to be a register.

3432 //	3555 //

3433 // The xor DEST, 31 converts a bit position to # of leading zeroes.	3556 // The xor DEST, 31 converts a bit position to # of leading zeroes.

3434 // E.g., for 000... 00001100, bsr will say that the most significant bit	3557 // E.g., for 000... 00001100, bsr will say that the most significant bit

3435 // set is at position 3, while the number of leading zeros is 28. Xor is	3558 // set is at position 3, while the number of leading zeros is 28. Xor is

3436 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).	3559 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros
	Jim Stichnoth 2015/08/10 19:39:20 reformat? reformat? John 2015/08/10 20:41:17 Done. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > reformat? Done.
	3560 // case).

3437 //	3561 //

3438 // Similar for 64-bit, but start w/ speculating that the upper 32 bits	3562 // Similar for 64-bit, but start w/ speculating that the upper 32 bits

3439 // are all zero, and compute the result for that case (checking the lower	3563 // are all zero, and compute the result for that case (checking the lower

3440 // 32 bits). Then actually compute the result for the upper bits and	3564 // 32 bits). Then actually compute the result for the upper bits and

3441 // cmov in the result from the lower computation if the earlier speculation	3565 // cmov in the result from the lower computation if the earlier speculation

3442 // was correct.	3566 // was correct.

3443 //	3567 //

3444 // Cttz, is similar, but uses bsf instead, and doesn't require the xor	3568 // Cttz, is similar, but uses bsf instead, and doesn't require the xor

3445 // bit position conversion, and the speculation is reversed.	3569 // bit position conversion, and the speculation is reversed.

3446 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);	3570 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);

(...skipping 10 matching lines...) Expand all Loading...
3457 if (Cttz) {	3581 if (Cttz) {

3458 _mov(T_Dest, ThirtyTwo);	3582 _mov(T_Dest, ThirtyTwo);

3459 } else {	3583 } else {

3460 Constant *SixtyThree = Ctx->getConstantInt32(63);	3584 Constant *SixtyThree = Ctx->getConstantInt32(63);

3461 _mov(T_Dest, SixtyThree);	3585 _mov(T_Dest, SixtyThree);

3462 }	3586 }

3463 _cmov(T_Dest, T, Traits::Cond::Br_ne);	3587 _cmov(T_Dest, T, Traits::Cond::Br_ne);

3464 if (!Cttz) {	3588 if (!Cttz) {

3465 _xor(T_Dest, ThirtyOne);	3589 _xor(T_Dest, ThirtyOne);

3466 }	3590 }

3467 if (Ty == IceType_i32) {	3591 if (Traits::Is64Bit \|\| Ty == IceType_i32) {

3468 _mov(Dest, T_Dest);	3592 _mov(Dest, T_Dest);

3469 return;	3593 return;

3470 }	3594 }

3471 _add(T_Dest, ThirtyTwo);	3595 _add(T_Dest, ThirtyTwo);

3472 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3596 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3473 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3597 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3474 // Will be using "test" on this, so we need a registerized variable.	3598 // Will be using "test" on this, so we need a registerized variable.

3475 Variable *SecondVar = legalizeToReg(SecondVal);	3599 Variable *SecondVar = legalizeToReg(SecondVal);

3476 Variable *T_Dest2 = makeReg(IceType_i32);	3600 Variable *T_Dest2 = makeReg(IceType_i32);

3477 if (Cttz) {	3601 if (Cttz) {

(...skipping 19 matching lines...) Expand all Loading...
3497 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);	3621 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);

3498 const bool IsCountConst = CountConst != nullptr;	3622 const bool IsCountConst = CountConst != nullptr;

3499 const bool IsValConst = ValConst != nullptr;	3623 const bool IsValConst = ValConst != nullptr;

3500 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;	3624 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;

3501 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;	3625 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;

3502	3626

3503 // Unlikely, but nothing to do if it does happen	3627 // Unlikely, but nothing to do if it does happen

3504 if (IsCountConst && CountValue == 0)	3628 if (IsCountConst && CountValue == 0)

3505 return;	3629 return;

3506	3630

3507 // TODO(ascull): if the count is constant but val is not it would be possible	3631 // TODO(ascull): if the count is constant but val is not it would be

3508 // to inline by spreading the value across 4 bytes and accessing subregs e.g.	3632 // possible
	Jim Stichnoth 2015/08/10 19:39:20 More weird linebreak stuff More weird linebreak stuff John 2015/08/10 20:41:17 Done. Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > More weird linebreak stuff Done.
	3633 // to inline by spreading the value across 4 bytes and accessing subregs

	3634 // e.g.

3509 // eax, ax and al.	3635 // eax, ax and al.

3510 if (IsCountConst && IsValConst) {	3636 if (IsCountConst && IsValConst) {

3511 Variable *Base = legalizeToReg(Dest);	3637 Variable *Base = legalizeToReg(Dest);

3512 // Add a FakeUse in case Base is ultimately not used, e.g. it falls back to	3638 // Add a FakeUse in case Base is ultimately not used, e.g. it falls back

	3639 // to

3513 // calling memset(). Otherwise Om1 register allocation fails because this	3640 // calling memset(). Otherwise Om1 register allocation fails because this

3514 // infinite-weight variable has a definition but no uses.	3641 // infinite-weight variable has a definition but no uses.

3515 Context.insert(InstFakeUse::create(Func, Base));	3642 Context.insert(InstFakeUse::create(Func, Base));

3516	3643

3517 // 3 is the awkward size as it is too small for the vector or 32-bit	3644 // 3 is the awkward size as it is too small for the vector or 32-bit

3518 // operations and will not work with lowerLeftOvers as there is no valid	3645 // operations and will not work with lowerLeftOvers as there is no valid

3519 // overlap.	3646 // overlap.

3520 if (CountValue == 3) {	3647 if (CountValue == 3) {

3521 Constant *Offset = nullptr;	3648 Constant *Offset = nullptr;

3522 auto *Mem =	3649 auto *Mem =

(...skipping 361 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3884 // Index is Index=Var-Const ==>	4011 // Index is Index=Var-Const ==>

3885 // set Index=Var, Offset-=(Const<<Shift)	4012 // set Index=Var, Offset-=(Const<<Shift)

3886	4013

3887 // TODO: consider overflow issues with respect to Offset.	4014 // TODO: consider overflow issues with respect to Offset.

3888 // TODO: handle symbolic constants.	4015 // TODO: handle symbolic constants.

3889 }	4016 }

3890 }	4017 }

3891	4018

3892 template <class Machine>	4019 template <class Machine>

3893 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {	4020 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {

3894 // A Load instruction can be treated the same as an Assign instruction, after	4021 // A Load instruction can be treated the same as an Assign instruction,

	4022 // after

3895 // the source operand is transformed into an Traits::X86OperandMem operand.	4023 // the source operand is transformed into an Traits::X86OperandMem operand.

3896 // Note that the address mode optimization already creates an	4024 // Note that the address mode optimization already creates an

3897 // Traits::X86OperandMem operand, so it doesn't need another level of	4025 // Traits::X86OperandMem operand, so it doesn't need another level of

3898 // transformation.	4026 // transformation.

3899 Variable *DestLoad = Load->getDest();	4027 Variable *DestLoad = Load->getDest();

3900 Type Ty = DestLoad->getType();	4028 Type Ty = DestLoad->getType();

3901 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);	4029 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);

3902 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);	4030 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);

3903 lowerAssign(Assign);	4031 lowerAssign(Assign);

3904 }	4032 }

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4046 return;	4174 return;

4047 }	4175 }

4048 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t	4176 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t

4049 // But if SrcT is immediate, we might be able to do better, as	4177 // But if SrcT is immediate, we might be able to do better, as

4050 // the cmov instruction doesn't allow an immediate operand:	4178 // the cmov instruction doesn't allow an immediate operand:

4051 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t	4179 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t

4052 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {	4180 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {

4053 std::swap(SrcT, SrcF);	4181 std::swap(SrcT, SrcF);

4054 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);	4182 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);

4055 }	4183 }

4056 if (DestTy == IceType_i64) {	4184 if (!Traits::Is64Bit && DestTy == IceType_i64) {

4057 SrcT = legalizeUndef(SrcT);	4185 SrcT = legalizeUndef(SrcT);

4058 SrcF = legalizeUndef(SrcF);	4186 SrcF = legalizeUndef(SrcF);

4059 // Set the low portion.	4187 // Set the low portion.

4060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	4188 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

4061 Variable *TLo = nullptr;	4189 Variable *TLo = nullptr;

4062 Operand *SrcFLo = legalize(loOperand(SrcF));	4190 Operand *SrcFLo = legalize(loOperand(SrcF));

4063 _mov(TLo, SrcFLo);	4191 _mov(TLo, SrcFLo);

4064 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Mem);	4192 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Mem);

4065 _cmov(TLo, SrcTLo, Cond);	4193 _cmov(TLo, SrcTLo, Cond);

4066 _mov(DestLo, TLo);	4194 _mov(DestLo, TLo);

4067 // Set the high portion.	4195 // Set the high portion.

4068 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	4196 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

4069 Variable *THi = nullptr;	4197 Variable *THi = nullptr;

4070 Operand *SrcFHi = legalize(hiOperand(SrcF));	4198 Operand *SrcFHi = legalize(hiOperand(SrcF));

4071 _mov(THi, SrcFHi);	4199 _mov(THi, SrcFHi);

4072 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Mem);	4200 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Mem);

4073 _cmov(THi, SrcTHi, Cond);	4201 _cmov(THi, SrcTHi, Cond);

4074 _mov(DestHi, THi);	4202 _mov(DestHi, THi);

4075 return;	4203 return;

4076 }	4204 }

4077	4205

4078 assert(DestTy == IceType_i16 \|\| DestTy == IceType_i32);	4206 assert(DestTy == IceType_i16 \|\| DestTy == IceType_i32 \|\|

	4207 (Traits::Is64Bit && DestTy == IceType_i64));

4079 Variable *T = nullptr;	4208 Variable *T = nullptr;

4080 SrcF = legalize(SrcF);	4209 SrcF = legalize(SrcF);

4081 _mov(T, SrcF);	4210 _mov(T, SrcF);

4082 SrcT = legalize(SrcT, Legal_Reg \| Legal_Mem);	4211 SrcT = legalize(SrcT, Legal_Reg \| Legal_Mem);

4083 _cmov(T, SrcT, Cond);	4212 _cmov(T, SrcT, Cond);

4084 _mov(Dest, T);	4213 _mov(Dest, T);

4085 }	4214 }

4086	4215

4087 template <class Machine>	4216 template <class Machine>

4088 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {	4217 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {

4089 Operand *Value = Inst->getData();	4218 Operand *Value = Inst->getData();

4090 Operand *Addr = Inst->getAddr();	4219 Operand *Addr = Inst->getAddr();

4091 typename Traits::X86OperandMem *NewAddr =	4220 typename Traits::X86OperandMem *NewAddr =

4092 formMemoryOperand(Addr, Value->getType());	4221 formMemoryOperand(Addr, Value->getType());

4093 Type Ty = NewAddr->getType();	4222 Type Ty = NewAddr->getType();

4094	4223

4095 if (Ty == IceType_i64) {	4224 if (!Traits::Is64Bit && Ty == IceType_i64) {

4096 Value = legalizeUndef(Value);	4225 Value = legalizeUndef(Value);

4097 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm);	4226 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm);

4098 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm);	4227 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm);

4099 _store(ValueHi,	4228 _store(ValueHi,

4100 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));	4229 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));

4101 _store(ValueLo,	4230 _store(ValueLo,

4102 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));	4231 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));

4103 } else if (isVectorType(Ty)) {	4232 } else if (isVectorType(Ty)) {

4104 _storep(legalizeToReg(Value), NewAddr);	4233 _storep(legalizeToReg(Value), NewAddr);

4105 } else {	4234 } else {

(...skipping 27 matching lines...) Expand all Loading...
4133 NewStore->setRmwBeacon(Inst->getRmwBeacon());	4262 NewStore->setRmwBeacon(Inst->getRmwBeacon());

4134 Context.insert(NewStore);	4263 Context.insert(NewStore);

4135 }	4264 }

4136 }	4265 }

4137	4266

4138 template <class Machine>	4267 template <class Machine>

4139 Operand TargetX86Base<Machine>::lowerCmpRange(Operand Comparison,	4268 Operand TargetX86Base<Machine>::lowerCmpRange(Operand Comparison,

4140 uint64_t Min, uint64_t Max) {	4269 uint64_t Min, uint64_t Max) {

4141 // TODO(ascull): 64-bit should not reach here but only because it is not	4270 // TODO(ascull): 64-bit should not reach here but only because it is not

4142 // implemented yet. This should be able to handle the 64-bit case.	4271 // implemented yet. This should be able to handle the 64-bit case.

4143 assert(Comparison->getType() != IceType_i64);	4272 assert(Traits::Is64Bit \|\| Comparison->getType() != IceType_i64);

4144 // Subtracting 0 is a nop so don't do it	4273 // Subtracting 0 is a nop so don't do it

4145 if (Min != 0) {	4274 if (Min != 0) {

4146 // Avoid clobbering the comparison by copying it	4275 // Avoid clobbering the comparison by copying it

4147 Variable *T = nullptr;	4276 Variable *T = nullptr;

4148 _mov(T, Comparison);	4277 _mov(T, Comparison);

4149 _sub(T, Ctx->getConstantInt32(Min));	4278 _sub(T, Ctx->getConstantInt32(Min));

4150 Comparison = T;	4279 Comparison = T;

4151 }	4280 }

4152	4281

4153 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));	4282 _cmp(Comparison, Ctx->getConstantInt32(Max - Min));

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4232	4361

4233 template <class Machine>	4362 template <class Machine>

4234 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {	4363 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {

4235 // Group cases together and navigate through them with a binary search	4364 // Group cases together and navigate through them with a binary search

4236 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);	4365 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst);

4237 Operand *Src0 = Inst->getComparison();	4366 Operand *Src0 = Inst->getComparison();

4238 CfgNode *DefaultTarget = Inst->getLabelDefault();	4367 CfgNode *DefaultTarget = Inst->getLabelDefault();

4239	4368

4240 assert(CaseClusters.size() != 0); // Should always be at least one	4369 assert(CaseClusters.size() != 0); // Should always be at least one

4241	4370

4242 if (Src0->getType() == IceType_i64) {	4371 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {

4243 Src0 = legalize(Src0); // get Base/Index into physical registers	4372 Src0 = legalize(Src0); // get Base/Index into physical registers

4244 Operand *Src0Lo = loOperand(Src0);	4373 Operand *Src0Lo = loOperand(Src0);

4245 Operand *Src0Hi = hiOperand(Src0);	4374 Operand *Src0Hi = hiOperand(Src0);

4246 if (CaseClusters.back().getHigh() > UINT32_MAX) {	4375 if (CaseClusters.back().getHigh() > UINT32_MAX) {

4247 // TODO(ascull): handle 64-bit case properly (currently naive version)	4376 // TODO(ascull): handle 64-bit case properly (currently naive version)

4248 // This might be handled by a higher level lowering of switches.	4377 // This might be handled by a higher level lowering of switches.

4249 SizeT NumCases = Inst->getNumCases();	4378 SizeT NumCases = Inst->getNumCases();

4250 if (NumCases >= 2) {	4379 if (NumCases >= 2) {

4251 Src0Lo = legalizeToReg(Src0Lo);	4380 Src0Lo = legalizeToReg(Src0Lo);

4252 Src0Hi = legalizeToReg(Src0Hi);	4381 Src0Hi = legalizeToReg(Src0Hi);

(...skipping 184 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4437 // that follows. This means that the original Store instruction is	4566 // that follows. This means that the original Store instruction is

4438 // still there, either because the value being stored is used beyond	4567 // still there, either because the value being stored is used beyond

4439 // the Store instruction, or because dead code elimination did not	4568 // the Store instruction, or because dead code elimination did not

4440 // happen. In either case, we cancel RMW lowering (and the caller	4569 // happen. In either case, we cancel RMW lowering (and the caller

4441 // deletes the RMW instruction).	4570 // deletes the RMW instruction).

4442 if (!RMW->isLastUse(RMW->getBeacon()))	4571 if (!RMW->isLastUse(RMW->getBeacon()))

4443 return;	4572 return;

4444 Operand *Src = RMW->getData();	4573 Operand *Src = RMW->getData();

4445 Type Ty = Src->getType();	4574 Type Ty = Src->getType();

4446 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);	4575 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);

4447 if (Ty == IceType_i64) {	4576 if (!Traits::Is64Bit && Ty == IceType_i64) {

4448 Src = legalizeUndef(Src);	4577 Src = legalizeUndef(Src);

4449 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg \| Legal_Imm);	4578 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg \| Legal_Imm);

4450 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg \| Legal_Imm);	4579 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg \| Legal_Imm);

4451 typename Traits::X86OperandMem *AddrLo =	4580 typename Traits::X86OperandMem *AddrLo =

4452 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));	4581 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));

4453 typename Traits::X86OperandMem *AddrHi =	4582 typename Traits::X86OperandMem *AddrHi =

4454 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));	4583 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));

4455 switch (RMW->getOp()) {	4584 switch (RMW->getOp()) {

4456 default:	4585 default:

4457 // TODO(stichnot): Implement other arithmetic operators.	4586 // TODO(stichnot): Implement other arithmetic operators.

(...skipping 13 matching lines...) Expand all Loading...
4471 case InstArithmetic::Or:	4600 case InstArithmetic::Or:

4472 _or_rmw(AddrLo, SrcLo);	4601 _or_rmw(AddrLo, SrcLo);

4473 _or_rmw(AddrHi, SrcHi);	4602 _or_rmw(AddrHi, SrcHi);

4474 return;	4603 return;

4475 case InstArithmetic::Xor:	4604 case InstArithmetic::Xor:

4476 _xor_rmw(AddrLo, SrcLo);	4605 _xor_rmw(AddrLo, SrcLo);

4477 _xor_rmw(AddrHi, SrcHi);	4606 _xor_rmw(AddrHi, SrcHi);

4478 return;	4607 return;

4479 }	4608 }

4480 } else {	4609 } else {

4481 // i8, i16, i32	4610 // x86-32: i8, i16, i32

	4611 // x86-64: i8, i16, i32, i64

4482 switch (RMW->getOp()) {	4612 switch (RMW->getOp()) {

4483 default:	4613 default:

4484 // TODO(stichnot): Implement other arithmetic operators.	4614 // TODO(stichnot): Implement other arithmetic operators.

4485 break;	4615 break;

4486 case InstArithmetic::Add:	4616 case InstArithmetic::Add:

4487 Src = legalize(Src, Legal_Reg \| Legal_Imm);	4617 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4488 _add_rmw(Addr, Src);	4618 _add_rmw(Addr, Src);

4489 return;	4619 return;

4490 case InstArithmetic::Sub:	4620 case InstArithmetic::Sub:

4491 Src = legalize(Src, Legal_Reg \| Legal_Imm);	4621 Src = legalize(Src, Legal_Reg \| Legal_Imm);

(...skipping 24 matching lines...) Expand all Loading...
4516 } else {	4646 } else {

4517 TargetLowering::lowerOther(Instr);	4647 TargetLowering::lowerOther(Instr);

4518 }	4648 }

4519 }	4649 }

4520	4650

4521 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to	4651 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to

4522 /// preserve integrity of liveness analysis. Undef values are also	4652 /// preserve integrity of liveness analysis. Undef values are also

4523 /// turned into zeroes, since loOperand() and hiOperand() don't expect	4653 /// turned into zeroes, since loOperand() and hiOperand() don't expect

4524 /// Undef input.	4654 /// Undef input.

4525 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {	4655 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

4526 // Pause constant blinding or pooling, blinding or pooling will be done later	4656 if (Traits::Is64Bit) {

	4657 // On x86-64 we don't need to prelower phis -- the architecture can handle

	4658 // 64-bit integer natively.

	4659 return;

	4660 }

	4661

	4662 // Pause constant blinding or pooling, blinding or pooling will be done

	4663 // later

4527 // during phi lowering assignments	4664 // during phi lowering assignments

4528 BoolFlagSaver B(RandomizationPoolingPaused, true);	4665 BoolFlagSaver B(RandomizationPoolingPaused, true);

4529 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(	4666 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(

4530 this, Context.getNode(), Func);	4667 this, Context.getNode(), Func);

4531 }	4668 }

4532	4669

4533 // There is no support for loading or emitting vector constants, so the	4670 // There is no support for loading or emitting vector constants, so the

4534 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,	4671 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,

4535 // etc. are initialized with register operations.	4672 // etc. are initialized with register operations.

4536 //	4673 //

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4678 if (auto *Const = llvm::dyn_cast<Constant>(From)) {	4815 if (auto *Const = llvm::dyn_cast<Constant>(From)) {

4679 if (llvm::isa<ConstantUndef>(Const)) {	4816 if (llvm::isa<ConstantUndef>(Const)) {

4680 From = legalizeUndef(Const, RegNum);	4817 From = legalizeUndef(Const, RegNum);

4681 if (isVectorType(Ty))	4818 if (isVectorType(Ty))

4682 return From;	4819 return From;

4683 Const = llvm::cast<Constant>(From);	4820 Const = llvm::cast<Constant>(From);

4684 }	4821 }

4685 // There should be no constants of vector type (other than undef).	4822 // There should be no constants of vector type (other than undef).

4686 assert(!isVectorType(Ty));	4823 assert(!isVectorType(Ty));

4687	4824

	4825 // If the operand is a 64 bit constant integer we need to legalize it to a

	4826 // register in x86-64.

	4827 if (Traits::Is64Bit) {

	4828 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Const)) {
	Jim Stichnoth 2015/08/10 19:39:20 Use isa<> instead of dyn_cast<>. Use isa<> instead of dyn_cast<>. John 2015/08/10 20:41:17 Is there any rule for isa v. dyn_cast? In this cas Show quoted text On 2015/08/10 19:39:20, stichnot wrote: > Use isa<> instead of dyn_cast<>. Is there any rule for isa v. dyn_cast? In this case I am using C below (although arguably I could just use const.) In this case, I would argue dyn_cast will not be worse than isa. Am I missing something? Jim Stichnoth 2015/08/11 16:01:36 You're probably right about equivalent code genera Show quoted text On 2015/08/10 20:41:17, John wrote: > On 2015/08/10 19:39:20, stichnot wrote: > > Use isa<> instead of dyn_cast<>. > > Is there any rule for isa v. dyn_cast? In this case I am using C below (although > arguably I could just use const.) In this case, I would argue dyn_cast will not > be worse than isa. Am I missing something? You're probably right about equivalent code generation being likely. I didn't find anything explicitly about this in the LLVM documentation. My feeling is that isa<> makes it more clear that you are just verifying a type, whereas with cast<> or dyn_cast<>, I assume you will be making use of some subtype-specific field or method. So isa<> makes it easier for me to reason about the code in the block (small as it might be). John 2015/08/12 19:27:54 Fair enough. Done. Show quoted text On 2015/08/11 16:01:36, stichnot wrote: > On 2015/08/10 20:41:17, John wrote: > > On 2015/08/10 19:39:20, stichnot wrote: > > > Use isa<> instead of dyn_cast<>. > > > > Is there any rule for isa v. dyn_cast? In this case I am using C below > (although > > arguably I could just use const.) In this case, I would argue dyn_cast will > not > > be worse than isa. Am I missing something? > > You're probably right about equivalent code generation being likely. > > I didn't find anything explicitly about this in the LLVM documentation. > > My feeling is that isa<> makes it more clear that you are just verifying a type, > whereas with cast<> or dyn_cast<>, I assume you will be making use of some > subtype-specific field or method. So isa<> makes it easier for me to reason > about the code in the block (small as it might be). Fair enough. Done.
	4829 Variable *V = copyToReg(C, RegNum);

	4830 V->setWeightInfinite();

	4831 return V;

	4832 }

	4833 }

	4834

4688 // If the operand is an 32 bit constant integer, we should check	4835 // If the operand is an 32 bit constant integer, we should check

4689 // whether we need to randomize it or pool it.	4836 // whether we need to randomize it or pool it.

4690 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {	4837 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {

4691 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);	4838 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);

4692 if (NewConst != Const) {	4839 if (NewConst != Const) {

4693 return NewConst;	4840 return NewConst;

4694 }	4841 }

4695 }	4842 }

4696	4843

4697 // Convert a scalar floating point constant into an explicit	4844 // Convert a scalar floating point constant into an explicit

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4815 }	4962 }

4816 // Do legalization, which contains randomization/pooling	4963 // Do legalization, which contains randomization/pooling

4817 // or do randomization/pooling.	4964 // or do randomization/pooling.

4818 return llvm::cast<typename Traits::X86OperandMem>(	4965 return llvm::cast<typename Traits::X86OperandMem>(

4819 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));	4966 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));

4820 }	4967 }

4821	4968

4822 template <class Machine>	4969 template <class Machine>

4823 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {	4970 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {

4824 // There aren't any 64-bit integer registers for x86-32.	4971 // There aren't any 64-bit integer registers for x86-32.

4825 assert(Type != IceType_i64);	4972 assert(Traits::Is64Bit \|\| Type != IceType_i64);

4826 Variable *Reg = Func->makeVariable(Type);	4973 Variable *Reg = Func->makeVariable(Type);

4827 if (RegNum == Variable::NoRegister)	4974 if (RegNum == Variable::NoRegister)

4828 Reg->setWeightInfinite();	4975 Reg->setWeightInfinite();

4829 else	4976 else

4830 Reg->setRegNum(RegNum);	4977 Reg->setRegNum(RegNum);

4831 return Reg;	4978 return Reg;

4832 }	4979 }

4833	4980

4834 template <class Machine> void TargetX86Base<Machine>::postLower() {	4981 template <class Machine> void TargetX86Base<Machine>::postLower() {

4835 if (Ctx->getFlags().getOptLevel() == Opt_m1)	4982 if (Ctx->getFlags().getOptLevel() == Opt_m1)

(...skipping 11 matching lines...) Expand all Loading...
4847	4994

4848 template <class Machine>	4995 template <class Machine>

4849 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {	4996 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {

4850 if (!BuildDefs::dump())	4997 if (!BuildDefs::dump())

4851 return;	4998 return;

4852 Ostream &Str = Ctx->getStrEmit();	4999 Ostream &Str = Ctx->getStrEmit();

4853 Str << getConstantPrefix() << C->getValue();	5000 Str << getConstantPrefix() << C->getValue();

4854 }	5001 }

4855	5002

4856 template <class Machine>	5003 template <class Machine>

4857 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {	5004 void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {

4858 llvm::report_fatal_error("Not expecting to emit 64-bit integers");	5005 if (!Traits::Is64Bit) {

	5006 llvm::report_fatal_error("Not expecting to emit 64-bit integers");

	5007 } else {

	5008 if (!BuildDefs::dump())

	5009 return;

	5010 Ostream &Str = Ctx->getStrEmit();

	5011 Str << getConstantPrefix() << C->getValue();

	5012 }

4859 }	5013 }

4860	5014

4861 template <class Machine>	5015 template <class Machine>

4862 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {	5016 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {

4863 if (!BuildDefs::dump())	5017 if (!BuildDefs::dump())

4864 return;	5018 return;

4865 Ostream &Str = Ctx->getStrEmit();	5019 Ostream &Str = Ctx->getStrEmit();

4866 C->emitPoolLabel(Str);	5020 C->emitPoolLabel(Str);

4867 }	5021 }

4868	5022

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4993 Constant *Mask1 = Ctx->getConstantInt(	5147 Constant *Mask1 = Ctx->getConstantInt(

4994 MemOperand->getOffset()->getType(), Cookie + Value);	5148 MemOperand->getOffset()->getType(), Cookie + Value);

4995 Constant *Mask2 =	5149 Constant *Mask2 =

4996 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);	5150 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);

4997	5151

4998 typename Traits::X86OperandMem *TempMemOperand =	5152 typename Traits::X86OperandMem *TempMemOperand =

4999 Traits::X86OperandMem::create(Func, MemOperand->getType(),	5153 Traits::X86OperandMem::create(Func, MemOperand->getType(),

5000 MemOperand->getBase(), Mask1);	5154 MemOperand->getBase(), Mask1);

5001 // If we have already assigned a physical register, we must come from	5155 // If we have already assigned a physical register, we must come from

5002 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse	5156 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse

5003 // the assigned register as this assignment is that start of its use-def	5157 // the assigned register as this assignment is that start of its

	5158 // use-def

5004 // chain. So we add RegNum argument here.	5159 // chain. So we add RegNum argument here.

5005 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);	5160 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);

5006 _lea(RegTemp, TempMemOperand);	5161 _lea(RegTemp, TempMemOperand);

5007 // As source operand doesn't use the dstreg, we don't need to add	5162 // As source operand doesn't use the dstreg, we don't need to add

5008 // _set_dest_nonkillable().	5163 // _set_dest_nonkillable().

5009 // But if we use the same Dest Reg, that is, with RegNum	5164 // But if we use the same Dest Reg, that is, with RegNum

5010 // assigned, we should add this _set_dest_nonkillable()	5165 // assigned, we should add this _set_dest_nonkillable()

5011 if (RegNum != Variable::NoRegister)	5166 if (RegNum != Variable::NoRegister)

5012 _set_dest_nonkillable();	5167 _set_dest_nonkillable();

5013	5168

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5077 }	5232 }

5078 // the offset is not eligible for blinding or pooling, return the original	5233 // the offset is not eligible for blinding or pooling, return the original

5079 // mem operand	5234 // mem operand

5080 return MemOperand;	5235 return MemOperand;

5081 }	5236 }

5082	5237

5083 } // end of namespace X86Internal	5238 } // end of namespace X86Internal

5084 } // end of namespace Ice	5239 } // end of namespace Ice

5085	5240

5086 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5241 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »