| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX86Base class, which | 10 // This file implements the TargetLoweringX86Base class, which |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 170 // optimization because it minimizes branches. | 170 // optimization because it minimizes branches. |
| 171 template <class MachineTraits> | 171 template <class MachineTraits> |
| 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
| 173 switch (getProducerKind(Instr)) { | 173 switch (getProducerKind(Instr)) { |
| 174 default: | 174 default: |
| 175 return false; | 175 return false; |
| 176 case PK_Icmp64: | 176 case PK_Icmp64: |
| 177 return true; | 177 return true; |
| 178 case PK_Fcmp: | 178 case PK_Fcmp: |
| 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 180 .C2 != CondX86::Br_None; | 180 .C2 != MachineTraits::Cond::Br_None; |
| 181 } | 181 } |
| 182 } | 182 } |
| 183 | 183 |
| 184 template <class MachineTraits> | 184 template <class MachineTraits> |
| 185 void BoolFolding<MachineTraits>::init(CfgNode *Node) { | 185 void BoolFolding<MachineTraits>::init(CfgNode *Node) { |
| 186 Producers.clear(); | 186 Producers.clear(); |
| 187 for (Inst &Instr : Node->getInsts()) { | 187 for (Inst &Instr : Node->getInsts()) { |
| 188 // Check whether Instr is a valid producer. | 188 // Check whether Instr is a valid producer. |
| 189 Variable *Var = Instr.getDest(); | 189 Variable *Var = Instr.getDest(); |
| 190 if (!Instr.isDeleted() // only consider non-deleted instructions | 190 if (!Instr.isDeleted() // only consider non-deleted instructions |
| (...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 278 if (Func->getContext()->getFlags().getTargetInstructionSet() != | 278 if (Func->getContext()->getFlags().getTargetInstructionSet() != |
| 279 TargetInstructionSet::BaseInstructionSet) { | 279 TargetInstructionSet::BaseInstructionSet) { |
| 280 InstructionSet = static_cast<typename Traits::InstructionSet>( | 280 InstructionSet = static_cast<typename Traits::InstructionSet>( |
| 281 (Func->getContext()->getFlags().getTargetInstructionSet() - | 281 (Func->getContext()->getFlags().getTargetInstructionSet() - |
| 282 TargetInstructionSet::X86InstructionSet_Begin) + | 282 TargetInstructionSet::X86InstructionSet_Begin) + |
| 283 Traits::InstructionSet::Begin); | 283 Traits::InstructionSet::Begin); |
| 284 } | 284 } |
| 285 // TODO: Don't initialize IntegerRegisters and friends every time. | 285 // TODO: Don't initialize IntegerRegisters and friends every time. |
| 286 // Instead, initialize in some sort of static initializer for the | 286 // Instead, initialize in some sort of static initializer for the |
| 287 // class. | 287 // class. |
| 288 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); | 288 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM); |
| 289 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); | 289 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM); |
| 290 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); | 290 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM); |
| 291 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); | 291 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM); |
| 292 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); | 292 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM); |
| 293 ScratchRegs.resize(RegX8632::Reg_NUM); | 293 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM); |
| 294 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 294 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 295 frameptr, isI8, isInt, isFP) \ | 295 frameptr, isI8, isInt, isFP) \ |
| 296 IntegerRegisters[RegX8632::val] = isInt; \ | 296 IntegerRegisters[Traits::RegisterSet::val] = isInt; \ |
| 297 IntegerRegistersI8[RegX8632::val] = isI8; \ | 297 IntegerRegistersI8[Traits::RegisterSet::val] = isI8; \ |
| 298 FloatRegisters[RegX8632::val] = isFP; \ | 298 FloatRegisters[Traits::RegisterSet::val] = isFP; \ |
| 299 VectorRegisters[RegX8632::val] = isFP; \ | 299 VectorRegisters[Traits::RegisterSet::val] = isFP; \ |
| 300 ScratchRegs[RegX8632::val] = scratch; | 300 ScratchRegs[Traits::RegisterSet::val] = scratch; |
| 301 REGX8632_TABLE; | 301 REGX8632_TABLE; |
| 302 #undef X | 302 #undef X |
| 303 TypeToRegisterSet[IceType_void] = InvalidRegisters; | 303 TypeToRegisterSet[IceType_void] = InvalidRegisters; |
| 304 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; | 304 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; |
| 305 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; | 305 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; |
| 306 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | 306 TypeToRegisterSet[IceType_i16] = IntegerRegisters; |
| 307 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | 307 TypeToRegisterSet[IceType_i32] = IntegerRegisters; |
| 308 TypeToRegisterSet[IceType_i64] = IntegerRegisters; | 308 TypeToRegisterSet[IceType_i64] = IntegerRegisters; |
| 309 TypeToRegisterSet[IceType_f32] = FloatRegisters; | 309 TypeToRegisterSet[IceType_f32] = FloatRegisters; |
| 310 TypeToRegisterSet[IceType_f64] = FloatRegisters; | 310 TypeToRegisterSet[IceType_f64] = FloatRegisters; |
| (...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 733 name, | 733 name, |
| 734 REGX8632_TABLE | 734 REGX8632_TABLE |
| 735 #undef X | 735 #undef X |
| 736 }; | 736 }; |
| 737 | 737 |
| 738 template <class Machine> | 738 template <class Machine> |
| 739 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 739 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { |
| 740 if (Ty == IceType_void) | 740 if (Ty == IceType_void) |
| 741 Ty = IceType_i32; | 741 Ty = IceType_i32; |
| 742 if (PhysicalRegisters[Ty].empty()) | 742 if (PhysicalRegisters[Ty].empty()) |
| 743 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); | 743 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
| 744 assert(RegNum < PhysicalRegisters[Ty].size()); | 744 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 745 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 745 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 746 if (Reg == nullptr) { | 746 if (Reg == nullptr) { |
| 747 Reg = Func->template makeVariable(Ty); | 747 Reg = Func->template makeVariable(Ty); |
| 748 Reg->setRegNum(RegNum); | 748 Reg->setRegNum(RegNum); |
| 749 PhysicalRegisters[Ty][RegNum] = Reg; | 749 PhysicalRegisters[Ty][RegNum] = Reg; |
| 750 // Specially mark esp as an "argument" so that it is considered | 750 // Specially mark esp as an "argument" so that it is considered |
| 751 // live upon function entry. | 751 // live upon function entry. |
| 752 if (RegNum == RegX8632::Reg_esp) { | 752 if (RegNum == Traits::RegisterSet::Reg_esp) { |
| 753 Func->addImplicitArg(Reg); | 753 Func->addImplicitArg(Reg); |
| 754 Reg->setIgnoreLiveness(); | 754 Reg->setIgnoreLiveness(); |
| 755 } | 755 } |
| 756 } | 756 } |
| 757 return Reg; | 757 return Reg; |
| 758 } | 758 } |
| 759 | 759 |
| 760 template <class Machine> | 760 template <class Machine> |
| 761 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { | 761 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { |
| 762 assert(RegNum < RegX8632::Reg_NUM); | 762 assert(RegNum < Traits::RegisterSet::Reg_NUM); |
| 763 static IceString RegNames8[] = { | 763 static IceString RegNames8[] = { |
| 764 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 764 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 765 frameptr, isI8, isInt, isFP) \ | 765 frameptr, isI8, isInt, isFP) \ |
| 766 name8, | 766 name8, |
| 767 REGX8632_TABLE | 767 REGX8632_TABLE |
| 768 #undef X | 768 #undef X |
| 769 }; | 769 }; |
| 770 static IceString RegNames16[] = { | 770 static IceString RegNames16[] = { |
| 771 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 771 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 772 frameptr, isI8, isInt, isFP) \ | 772 frameptr, isI8, isInt, isFP) \ |
| (...skipping 25 matching lines...) Expand all Loading... |
| 798 int32_t Offset = Var->getStackOffset(); | 798 int32_t Offset = Var->getStackOffset(); |
| 799 if (!hasFramePointer()) | 799 if (!hasFramePointer()) |
| 800 Offset += getStackAdjustment(); | 800 Offset += getStackAdjustment(); |
| 801 if (Offset) | 801 if (Offset) |
| 802 Str << Offset; | 802 Str << Offset; |
| 803 const Type FrameSPTy = IceType_i32; | 803 const Type FrameSPTy = IceType_i32; |
| 804 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; | 804 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; |
| 805 } | 805 } |
| 806 | 806 |
| 807 template <class Machine> | 807 template <class Machine> |
| 808 X8632::Address | 808 typename TargetX86Base<Machine>::Traits::Address |
| 809 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { | 809 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { |
| 810 if (Var->hasReg()) | 810 if (Var->hasReg()) |
| 811 llvm_unreachable("Stack Variable has a register assigned"); | 811 llvm_unreachable("Stack Variable has a register assigned"); |
| 812 if (Var->getWeight().isInf()) { | 812 if (Var->getWeight().isInf()) { |
| 813 llvm_unreachable("Infinite-weight Variable has no register assigned"); | 813 llvm_unreachable("Infinite-weight Variable has no register assigned"); |
| 814 } | 814 } |
| 815 int32_t Offset = Var->getStackOffset(); | 815 int32_t Offset = Var->getStackOffset(); |
| 816 if (!hasFramePointer()) | 816 if (!hasFramePointer()) |
| 817 Offset += getStackAdjustment(); | 817 Offset += getStackAdjustment(); |
| 818 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); | 818 return typename Traits::Address( |
| 819 Traits::RegisterSet::getEncodedGPR(getFrameOrStackReg()), Offset); |
| 819 } | 820 } |
| 820 | 821 |
| 821 template <class Machine> void TargetX86Base<Machine>::lowerArguments() { | 822 template <class Machine> void TargetX86Base<Machine>::lowerArguments() { |
| 822 VarList &Args = Func->getArgs(); | 823 VarList &Args = Func->getArgs(); |
| 823 // The first four arguments of vector type, regardless of their | 824 // The first four arguments of vector type, regardless of their |
| 824 // position relative to the other arguments in the argument list, are | 825 // position relative to the other arguments in the argument list, are |
| 825 // passed in registers xmm0 - xmm3. | 826 // passed in registers xmm0 - xmm3. |
| 826 unsigned NumXmmArgs = 0; | 827 unsigned NumXmmArgs = 0; |
| 827 | 828 |
| 828 Context.init(Func->getEntryNode()); | 829 Context.init(Func->getEntryNode()); |
| 829 Context.setInsertPoint(Context.getCur()); | 830 Context.setInsertPoint(Context.getCur()); |
| 830 | 831 |
| 831 for (SizeT I = 0, E = Args.size(); | 832 for (SizeT I = 0, E = Args.size(); |
| 832 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { | 833 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { |
| 833 Variable *Arg = Args[I]; | 834 Variable *Arg = Args[I]; |
| 834 Type Ty = Arg->getType(); | 835 Type Ty = Arg->getType(); |
| 835 if (!isVectorType(Ty)) | 836 if (!isVectorType(Ty)) |
| 836 continue; | 837 continue; |
| 837 // Replace Arg in the argument list with the home register. Then | 838 // Replace Arg in the argument list with the home register. Then |
| 838 // generate an instruction in the prolog to copy the home register | 839 // generate an instruction in the prolog to copy the home register |
| 839 // to the assigned location of Arg. | 840 // to the assigned location of Arg. |
| 840 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; | 841 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; |
| 841 ++NumXmmArgs; | 842 ++NumXmmArgs; |
| 842 Variable *RegisterArg = Func->template makeVariable(Ty); | 843 Variable *RegisterArg = Func->template makeVariable(Ty); |
| 843 if (BuildDefs::dump()) | 844 if (BuildDefs::dump()) |
| 844 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | 845 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
| 845 RegisterArg->setRegNum(RegNum); | 846 RegisterArg->setRegNum(RegNum); |
| 846 RegisterArg->setIsArg(); | 847 RegisterArg->setIsArg(); |
| 847 Arg->setIsArg(false); | 848 Arg->setIsArg(false); |
| 848 | 849 |
| 849 Args[I] = RegisterArg; | 850 Args[I] = RegisterArg; |
| 850 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 851 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 990 _push(getPhysicalRegister(i)); | 991 _push(getPhysicalRegister(i)); |
| 991 } | 992 } |
| 992 } | 993 } |
| 993 Ctx->statsUpdateRegistersSaved(NumCallee); | 994 Ctx->statsUpdateRegistersSaved(NumCallee); |
| 994 | 995 |
| 995 // Generate "push ebp; mov ebp, esp" | 996 // Generate "push ebp; mov ebp, esp" |
| 996 if (IsEbpBasedFrame) { | 997 if (IsEbpBasedFrame) { |
| 997 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 998 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
| 998 .count() == 0); | 999 .count() == 0); |
| 999 PreservedRegsSizeBytes += 4; | 1000 PreservedRegsSizeBytes += 4; |
| 1000 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); | 1001 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); |
| 1001 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); | 1002 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 1002 _push(ebp); | 1003 _push(ebp); |
| 1003 _mov(ebp, esp); | 1004 _mov(ebp, esp); |
| 1004 // Keep ebp live for late-stage liveness analysis | 1005 // Keep ebp live for late-stage liveness analysis |
| 1005 // (e.g. asm-verbose mode). | 1006 // (e.g. asm-verbose mode). |
| 1006 Context.insert(InstFakeUse::create(Func, ebp)); | 1007 Context.insert(InstFakeUse::create(Func, ebp)); |
| 1007 } | 1008 } |
| 1008 | 1009 |
| 1009 // Align the variables area. SpillAreaPaddingBytes is the size of | 1010 // Align the variables area. SpillAreaPaddingBytes is the size of |
| 1010 // the region after the preserved registers and before the spill areas. | 1011 // the region after the preserved registers and before the spill areas. |
| 1011 // LocalsSlotsPaddingBytes is the amount of padding between the globals | 1012 // LocalsSlotsPaddingBytes is the amount of padding between the globals |
| (...skipping 14 matching lines...) Expand all Loading... |
| 1026 if (NeedsStackAlignment) { | 1027 if (NeedsStackAlignment) { |
| 1027 uint32_t StackOffset = | 1028 uint32_t StackOffset = |
| 1028 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 1029 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 1029 uint32_t StackSize = | 1030 uint32_t StackSize = |
| 1030 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 1031 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 1031 SpillAreaSizeBytes = StackSize - StackOffset; | 1032 SpillAreaSizeBytes = StackSize - StackOffset; |
| 1032 } | 1033 } |
| 1033 | 1034 |
| 1034 // Generate "sub esp, SpillAreaSizeBytes" | 1035 // Generate "sub esp, SpillAreaSizeBytes" |
| 1035 if (SpillAreaSizeBytes) | 1036 if (SpillAreaSizeBytes) |
| 1036 _sub(getPhysicalRegister(RegX8632::Reg_esp), | 1037 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 1037 Ctx->getConstantInt32(SpillAreaSizeBytes)); | 1038 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 1038 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1039 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 1039 | 1040 |
| 1040 resetStackAdjustment(); | 1041 resetStackAdjustment(); |
| 1041 | 1042 |
| 1042 // Fill in stack offsets for stack args, and copy args into registers | 1043 // Fill in stack offsets for stack args, and copy args into registers |
| 1043 // for those that were register-allocated. Args are pushed right to | 1044 // for those that were register-allocated. Args are pushed right to |
| 1044 // left, so Arg[0] is closest to the stack/frame pointer. | 1045 // left, so Arg[0] is closest to the stack/frame pointer. |
| 1045 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1046 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 1046 size_t BasicFrameOffset = | 1047 size_t BasicFrameOffset = |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1110 if (RI == E) | 1111 if (RI == E) |
| 1111 return; | 1112 return; |
| 1112 | 1113 |
| 1113 // Convert the reverse_iterator position into its corresponding | 1114 // Convert the reverse_iterator position into its corresponding |
| 1114 // (forward) iterator position. | 1115 // (forward) iterator position. |
| 1115 InstList::iterator InsertPoint = RI.base(); | 1116 InstList::iterator InsertPoint = RI.base(); |
| 1116 --InsertPoint; | 1117 --InsertPoint; |
| 1117 Context.init(Node); | 1118 Context.init(Node); |
| 1118 Context.setInsertPoint(InsertPoint); | 1119 Context.setInsertPoint(InsertPoint); |
| 1119 | 1120 |
| 1120 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); | 1121 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 1121 if (IsEbpBasedFrame) { | 1122 if (IsEbpBasedFrame) { |
| 1122 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); | 1123 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); |
| 1123 // For late-stage liveness analysis (e.g. asm-verbose mode), | 1124 // For late-stage liveness analysis (e.g. asm-verbose mode), |
| 1124 // adding a fake use of esp before the assignment of esp=ebp keeps | 1125 // adding a fake use of esp before the assignment of esp=ebp keeps |
| 1125 // previous esp adjustments from being dead-code eliminated. | 1126 // previous esp adjustments from being dead-code eliminated. |
| 1126 Context.insert(InstFakeUse::create(Func, esp)); | 1127 Context.insert(InstFakeUse::create(Func, esp)); |
| 1127 _mov(esp, ebp); | 1128 _mov(esp, ebp); |
| 1128 _pop(ebp); | 1129 _pop(ebp); |
| 1129 } else { | 1130 } else { |
| 1130 // add esp, SpillAreaSizeBytes | 1131 // add esp, SpillAreaSizeBytes |
| 1131 if (SpillAreaSizeBytes) | 1132 if (SpillAreaSizeBytes) |
| 1132 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); | 1133 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 1133 } | 1134 } |
| 1134 | 1135 |
| 1135 // Add pop instructions for preserved registers. | 1136 // Add pop instructions for preserved registers. |
| 1136 llvm::SmallBitVector CalleeSaves = | 1137 llvm::SmallBitVector CalleeSaves = |
| 1137 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 1138 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| 1138 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 1139 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 1139 SizeT j = CalleeSaves.size() - i - 1; | 1140 SizeT j = CalleeSaves.size() - i - 1; |
| 1140 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) | 1141 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame) |
| 1141 continue; | 1142 continue; |
| 1142 if (CalleeSaves[j] && RegsUsed[j]) { | 1143 if (CalleeSaves[j] && RegsUsed[j]) { |
| 1143 _pop(getPhysicalRegister(j)); | 1144 _pop(getPhysicalRegister(j)); |
| 1144 } | 1145 } |
| 1145 } | 1146 } |
| 1146 | 1147 |
| 1147 if (!Ctx->getFlags().getUseSandboxing()) | 1148 if (!Ctx->getFlags().getUseSandboxing()) |
| 1148 return; | 1149 return; |
| 1149 // Change the original ret instruction into a sandboxed return sequence. | 1150 // Change the original ret instruction into a sandboxed return sequence. |
| 1150 // t:ecx = pop | 1151 // t:ecx = pop |
| 1151 // bundle_lock | 1152 // bundle_lock |
| 1152 // and t, ~31 | 1153 // and t, ~31 |
| 1153 // jmp *t | 1154 // jmp *t |
| 1154 // bundle_unlock | 1155 // bundle_unlock |
| 1155 // FakeUse <original_ret_operand> | 1156 // FakeUse <original_ret_operand> |
| 1156 const SizeT BundleSize = | 1157 const SizeT BundleSize = |
| 1157 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); | 1158 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 1158 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 1159 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 1159 _pop(T_ecx); | 1160 _pop(T_ecx); |
| 1160 _bundle_lock(); | 1161 _bundle_lock(); |
| 1161 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | 1162 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 1162 _jmp(T_ecx); | 1163 _jmp(T_ecx); |
| 1163 _bundle_unlock(); | 1164 _bundle_unlock(); |
| 1164 if (RI->getSrcSize()) { | 1165 if (RI->getSrcSize()) { |
| 1165 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1166 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1166 Context.insert(InstFakeUse::create(Func, RetValue)); | 1167 Context.insert(InstFakeUse::create(Func, RetValue)); |
| 1167 } | 1168 } |
| 1168 RI->setDeleted(); | 1169 RI->setDeleted(); |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1266 return legalize(MemOperand); | 1267 return legalize(MemOperand); |
| 1267 } | 1268 } |
| 1268 llvm_unreachable("Unsupported operand type"); | 1269 llvm_unreachable("Unsupported operand type"); |
| 1269 return nullptr; | 1270 return nullptr; |
| 1270 } | 1271 } |
| 1271 | 1272 |
| 1272 template <class Machine> | 1273 template <class Machine> |
| 1273 llvm::SmallBitVector | 1274 llvm::SmallBitVector |
| 1274 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, | 1275 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, |
| 1275 RegSetMask Exclude) const { | 1276 RegSetMask Exclude) const { |
| 1276 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); | 1277 llvm::SmallBitVector Registers(Traits::RegisterSet::Reg_NUM); |
| 1277 | 1278 |
| 1278 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 1279 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 1279 frameptr, isI8, isInt, isFP) \ | 1280 frameptr, isI8, isInt, isFP) \ |
| 1280 if (scratch && (Include & RegSet_CallerSave)) \ | 1281 if (scratch && (Include & RegSet_CallerSave)) \ |
| 1281 Registers[RegX8632::val] = true; \ | 1282 Registers[Traits::RegisterSet::val] = true; \ |
| 1282 if (preserved && (Include & RegSet_CalleeSave)) \ | 1283 if (preserved && (Include & RegSet_CalleeSave)) \ |
| 1283 Registers[RegX8632::val] = true; \ | 1284 Registers[Traits::RegisterSet::val] = true; \ |
| 1284 if (stackptr && (Include & RegSet_StackPointer)) \ | 1285 if (stackptr && (Include & RegSet_StackPointer)) \ |
| 1285 Registers[RegX8632::val] = true; \ | 1286 Registers[Traits::RegisterSet::val] = true; \ |
| 1286 if (frameptr && (Include & RegSet_FramePointer)) \ | 1287 if (frameptr && (Include & RegSet_FramePointer)) \ |
| 1287 Registers[RegX8632::val] = true; \ | 1288 Registers[Traits::RegisterSet::val] = true; \ |
| 1288 if (scratch && (Exclude & RegSet_CallerSave)) \ | 1289 if (scratch && (Exclude & RegSet_CallerSave)) \ |
| 1289 Registers[RegX8632::val] = false; \ | 1290 Registers[Traits::RegisterSet::val] = false; \ |
| 1290 if (preserved && (Exclude & RegSet_CalleeSave)) \ | 1291 if (preserved && (Exclude & RegSet_CalleeSave)) \ |
| 1291 Registers[RegX8632::val] = false; \ | 1292 Registers[Traits::RegisterSet::val] = false; \ |
| 1292 if (stackptr && (Exclude & RegSet_StackPointer)) \ | 1293 if (stackptr && (Exclude & RegSet_StackPointer)) \ |
| 1293 Registers[RegX8632::val] = false; \ | 1294 Registers[Traits::RegisterSet::val] = false; \ |
| 1294 if (frameptr && (Exclude & RegSet_FramePointer)) \ | 1295 if (frameptr && (Exclude & RegSet_FramePointer)) \ |
| 1295 Registers[RegX8632::val] = false; | 1296 Registers[Traits::RegisterSet::val] = false; |
| 1296 | 1297 |
| 1297 REGX8632_TABLE | 1298 REGX8632_TABLE |
| 1298 | 1299 |
| 1299 #undef X | 1300 #undef X |
| 1300 | 1301 |
| 1301 return Registers; | 1302 return Registers; |
| 1302 } | 1303 } |
| 1303 | 1304 |
| 1304 template <class Machine> | 1305 template <class Machine> |
| 1305 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { | 1306 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { |
| 1306 IsEbpBasedFrame = true; | 1307 IsEbpBasedFrame = true; |
| 1307 // Conservatively require the stack to be aligned. Some stack | 1308 // Conservatively require the stack to be aligned. Some stack |
| 1308 // adjustment operations implemented below assume that the stack is | 1309 // adjustment operations implemented below assume that the stack is |
| 1309 // aligned before the alloca. All the alloca code ensures that the | 1310 // aligned before the alloca. All the alloca code ensures that the |
| 1310 // stack alignment is preserved after the alloca. The stack alignment | 1311 // stack alignment is preserved after the alloca. The stack alignment |
| 1311 // restriction can be relaxed in some cases. | 1312 // restriction can be relaxed in some cases. |
| 1312 NeedsStackAlignment = true; | 1313 NeedsStackAlignment = true; |
| 1313 | 1314 |
| 1314 // TODO(stichnot): minimize the number of adjustments of esp, etc. | 1315 // TODO(stichnot): minimize the number of adjustments of esp, etc. |
| 1315 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); | 1316 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 1316 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1317 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
| 1317 Variable *Dest = Inst->getDest(); | 1318 Variable *Dest = Inst->getDest(); |
| 1318 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 1319 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
| 1319 // For default align=0, set it to the real value 1, to avoid any | 1320 // For default align=0, set it to the real value 1, to avoid any |
| 1320 // bit-manipulation problems below. | 1321 // bit-manipulation problems below. |
| 1321 AlignmentParam = std::max(AlignmentParam, 1u); | 1322 AlignmentParam = std::max(AlignmentParam, 1u); |
| 1322 | 1323 |
| 1323 // LLVM enforces power of 2 alignment. | 1324 // LLVM enforces power of 2 alignment. |
| 1324 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1325 assert(llvm::isPowerOf2_32(AlignmentParam)); |
| 1325 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); | 1326 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); |
| (...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1544 case InstArithmetic::Sub: | 1545 case InstArithmetic::Sub: |
| 1545 _mov(T_Lo, Src0Lo); | 1546 _mov(T_Lo, Src0Lo); |
| 1546 _sub(T_Lo, Src1Lo); | 1547 _sub(T_Lo, Src1Lo); |
| 1547 _mov(DestLo, T_Lo); | 1548 _mov(DestLo, T_Lo); |
| 1548 _mov(T_Hi, Src0Hi); | 1549 _mov(T_Hi, Src0Hi); |
| 1549 _sbb(T_Hi, Src1Hi); | 1550 _sbb(T_Hi, Src1Hi); |
| 1550 _mov(DestHi, T_Hi); | 1551 _mov(DestHi, T_Hi); |
| 1551 break; | 1552 break; |
| 1552 case InstArithmetic::Mul: { | 1553 case InstArithmetic::Mul: { |
| 1553 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1554 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1554 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax); | 1555 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 1555 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx); | 1556 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 1556 // gcc does the following: | 1557 // gcc does the following: |
| 1557 // a=b*c ==> | 1558 // a=b*c ==> |
| 1558 // t1 = b.hi; t1 *=(imul) c.lo | 1559 // t1 = b.hi; t1 *=(imul) c.lo |
| 1559 // t2 = c.hi; t2 *=(imul) b.lo | 1560 // t2 = c.hi; t2 *=(imul) b.lo |
| 1560 // t3:eax = b.lo | 1561 // t3:eax = b.lo |
| 1561 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo | 1562 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo |
| 1562 // a.lo = t4.lo | 1563 // a.lo = t4.lo |
| 1563 // t4.hi += t1 | 1564 // t4.hi += t1 |
| 1564 // t4.hi += t2 | 1565 // t4.hi += t2 |
| 1565 // a.hi = t4.hi | 1566 // a.hi = t4.hi |
| 1566 // The mul instruction cannot take an immediate operand. | 1567 // The mul instruction cannot take an immediate operand. |
| 1567 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); | 1568 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); |
| 1568 _mov(T_1, Src0Hi); | 1569 _mov(T_1, Src0Hi); |
| 1569 _imul(T_1, Src1Lo); | 1570 _imul(T_1, Src1Lo); |
| 1570 _mov(T_2, Src1Hi); | 1571 _mov(T_2, Src1Hi); |
| 1571 _imul(T_2, Src0Lo); | 1572 _imul(T_2, Src0Lo); |
| 1572 _mov(T_3, Src0Lo, RegX8632::Reg_eax); | 1573 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); |
| 1573 _mul(T_4Lo, T_3, Src1Lo); | 1574 _mul(T_4Lo, T_3, Src1Lo); |
| 1574 // The mul instruction produces two dest variables, edx:eax. We | 1575 // The mul instruction produces two dest variables, edx:eax. We |
| 1575 // create a fake definition of edx to account for this. | 1576 // create a fake definition of edx to account for this. |
| 1576 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | 1577 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); |
| 1577 _mov(DestLo, T_4Lo); | 1578 _mov(DestLo, T_4Lo); |
| 1578 _add(T_4Hi, T_1); | 1579 _add(T_4Hi, T_1); |
| 1579 _add(T_4Hi, T_2); | 1580 _add(T_4Hi, T_2); |
| 1580 _mov(DestHi, T_4Hi); | 1581 _mov(DestHi, T_4Hi); |
| 1581 } break; | 1582 } break; |
| 1582 case InstArithmetic::Shl: { | 1583 case InstArithmetic::Shl: { |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1593 // use(t3) | 1594 // use(t3) |
| 1594 // t3 = t2 | 1595 // t3 = t2 |
| 1595 // t2 = 0 | 1596 // t2 = 0 |
| 1596 // L1: | 1597 // L1: |
| 1597 // a.lo = t2 | 1598 // a.lo = t2 |
| 1598 // a.hi = t3 | 1599 // a.hi = t3 |
| 1599 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1600 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1600 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1601 Constant *BitTest = Ctx->getConstantInt32(0x20); |
| 1601 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1602 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1602 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1603 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 1603 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); | 1604 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| 1604 _mov(T_2, Src0Lo); | 1605 _mov(T_2, Src0Lo); |
| 1605 _mov(T_3, Src0Hi); | 1606 _mov(T_3, Src0Hi); |
| 1606 _shld(T_3, T_2, T_1); | 1607 _shld(T_3, T_2, T_1); |
| 1607 _shl(T_2, T_1); | 1608 _shl(T_2, T_1); |
| 1608 _test(T_1, BitTest); | 1609 _test(T_1, BitTest); |
| 1609 _br(CondX86::Br_e, Label); | 1610 _br(Traits::Cond::Br_e, Label); |
| 1610 // T_2 and T_3 are being assigned again because of the | 1611 // T_2 and T_3 are being assigned again because of the |
| 1611 // intra-block control flow, so we need the _mov_nonkillable | 1612 // intra-block control flow, so we need the _mov_nonkillable |
| 1612 // variant to avoid liveness problems. | 1613 // variant to avoid liveness problems. |
| 1613 _mov_nonkillable(T_3, T_2); | 1614 _mov_nonkillable(T_3, T_2); |
| 1614 _mov_nonkillable(T_2, Zero); | 1615 _mov_nonkillable(T_2, Zero); |
| 1615 Context.insert(Label); | 1616 Context.insert(Label); |
| 1616 _mov(DestLo, T_2); | 1617 _mov(DestLo, T_2); |
| 1617 _mov(DestHi, T_3); | 1618 _mov(DestHi, T_3); |
| 1618 } break; | 1619 } break; |
| 1619 case InstArithmetic::Lshr: { | 1620 case InstArithmetic::Lshr: { |
| 1620 // a=b>>c (unsigned) ==> | 1621 // a=b>>c (unsigned) ==> |
| 1621 // t1:ecx = c.lo & 0xff | 1622 // t1:ecx = c.lo & 0xff |
| 1622 // t2 = b.lo | 1623 // t2 = b.lo |
| 1623 // t3 = b.hi | 1624 // t3 = b.hi |
| 1624 // t2 = shrd t2, t3, t1 | 1625 // t2 = shrd t2, t3, t1 |
| 1625 // t3 = shr t3, t1 | 1626 // t3 = shr t3, t1 |
| 1626 // test t1, 0x20 | 1627 // test t1, 0x20 |
| 1627 // je L1 | 1628 // je L1 |
| 1628 // use(t2) | 1629 // use(t2) |
| 1629 // t2 = t3 | 1630 // t2 = t3 |
| 1630 // t3 = 0 | 1631 // t3 = 0 |
| 1631 // L1: | 1632 // L1: |
| 1632 // a.lo = t2 | 1633 // a.lo = t2 |
| 1633 // a.hi = t3 | 1634 // a.hi = t3 |
| 1634 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1635 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1635 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1636 Constant *BitTest = Ctx->getConstantInt32(0x20); |
| 1636 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1637 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1637 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1638 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 1638 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); | 1639 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| 1639 _mov(T_2, Src0Lo); | 1640 _mov(T_2, Src0Lo); |
| 1640 _mov(T_3, Src0Hi); | 1641 _mov(T_3, Src0Hi); |
| 1641 _shrd(T_2, T_3, T_1); | 1642 _shrd(T_2, T_3, T_1); |
| 1642 _shr(T_3, T_1); | 1643 _shr(T_3, T_1); |
| 1643 _test(T_1, BitTest); | 1644 _test(T_1, BitTest); |
| 1644 _br(CondX86::Br_e, Label); | 1645 _br(Traits::Cond::Br_e, Label); |
| 1645 // T_2 and T_3 are being assigned again because of the | 1646 // T_2 and T_3 are being assigned again because of the |
| 1646 // intra-block control flow, so we need the _mov_nonkillable | 1647 // intra-block control flow, so we need the _mov_nonkillable |
| 1647 // variant to avoid liveness problems. | 1648 // variant to avoid liveness problems. |
| 1648 _mov_nonkillable(T_2, T_3); | 1649 _mov_nonkillable(T_2, T_3); |
| 1649 _mov_nonkillable(T_3, Zero); | 1650 _mov_nonkillable(T_3, Zero); |
| 1650 Context.insert(Label); | 1651 Context.insert(Label); |
| 1651 _mov(DestLo, T_2); | 1652 _mov(DestLo, T_2); |
| 1652 _mov(DestHi, T_3); | 1653 _mov(DestHi, T_3); |
| 1653 } break; | 1654 } break; |
| 1654 case InstArithmetic::Ashr: { | 1655 case InstArithmetic::Ashr: { |
| 1655 // a=b>>c (signed) ==> | 1656 // a=b>>c (signed) ==> |
| 1656 // t1:ecx = c.lo & 0xff | 1657 // t1:ecx = c.lo & 0xff |
| 1657 // t2 = b.lo | 1658 // t2 = b.lo |
| 1658 // t3 = b.hi | 1659 // t3 = b.hi |
| 1659 // t2 = shrd t2, t3, t1 | 1660 // t2 = shrd t2, t3, t1 |
| 1660 // t3 = sar t3, t1 | 1661 // t3 = sar t3, t1 |
| 1661 // test t1, 0x20 | 1662 // test t1, 0x20 |
| 1662 // je L1 | 1663 // je L1 |
| 1663 // use(t2) | 1664 // use(t2) |
| 1664 // t2 = t3 | 1665 // t2 = t3 |
| 1665 // t3 = sar t3, 0x1f | 1666 // t3 = sar t3, 0x1f |
| 1666 // L1: | 1667 // L1: |
| 1667 // a.lo = t2 | 1668 // a.lo = t2 |
| 1668 // a.hi = t3 | 1669 // a.hi = t3 |
| 1669 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; | 1670 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| 1670 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1671 Constant *BitTest = Ctx->getConstantInt32(0x20); |
| 1671 Constant *SignExtend = Ctx->getConstantInt32(0x1f); | 1672 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| 1672 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1673 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 1673 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); | 1674 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| 1674 _mov(T_2, Src0Lo); | 1675 _mov(T_2, Src0Lo); |
| 1675 _mov(T_3, Src0Hi); | 1676 _mov(T_3, Src0Hi); |
| 1676 _shrd(T_2, T_3, T_1); | 1677 _shrd(T_2, T_3, T_1); |
| 1677 _sar(T_3, T_1); | 1678 _sar(T_3, T_1); |
| 1678 _test(T_1, BitTest); | 1679 _test(T_1, BitTest); |
| 1679 _br(CondX86::Br_e, Label); | 1680 _br(Traits::Cond::Br_e, Label); |
| 1680 // T_2 and T_3 are being assigned again because of the | 1681 // T_2 and T_3 are being assigned again because of the |
| 1681 // intra-block control flow, so T_2 needs the _mov_nonkillable | 1682 // intra-block control flow, so T_2 needs the _mov_nonkillable |
| 1682 // variant to avoid liveness problems. T_3 doesn't need special | 1683 // variant to avoid liveness problems. T_3 doesn't need special |
| 1683 // treatment because it is reassigned via _sar instead of _mov. | 1684 // treatment because it is reassigned via _sar instead of _mov. |
| 1684 _mov_nonkillable(T_2, T_3); | 1685 _mov_nonkillable(T_2, T_3); |
| 1685 _sar(T_3, SignExtend); | 1686 _sar(T_3, SignExtend); |
| 1686 Context.insert(Label); | 1687 Context.insert(Label); |
| 1687 _mov(DestLo, T_2); | 1688 _mov(DestLo, T_2); |
| 1688 _mov(DestHi, T_3); | 1689 _mov(DestHi, T_3); |
| 1689 } break; | 1690 } break; |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1740 case InstArithmetic::Sub: { | 1741 case InstArithmetic::Sub: { |
| 1741 Variable *T = makeReg(Dest->getType()); | 1742 Variable *T = makeReg(Dest->getType()); |
| 1742 _movp(T, Src0); | 1743 _movp(T, Src0); |
| 1743 _psub(T, Src1); | 1744 _psub(T, Src1); |
| 1744 _movp(Dest, T); | 1745 _movp(Dest, T); |
| 1745 } break; | 1746 } break; |
| 1746 case InstArithmetic::Mul: { | 1747 case InstArithmetic::Mul: { |
| 1747 bool TypesAreValidForPmull = | 1748 bool TypesAreValidForPmull = |
| 1748 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; | 1749 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; |
| 1749 bool InstructionSetIsValidForPmull = | 1750 bool InstructionSetIsValidForPmull = |
| 1750 Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1; | 1751 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; |
| 1751 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 1752 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| 1752 Variable *T = makeReg(Dest->getType()); | 1753 Variable *T = makeReg(Dest->getType()); |
| 1753 _movp(T, Src0); | 1754 _movp(T, Src0); |
| 1754 _pmull(T, Src1); | 1755 _pmull(T, Src1); |
| 1755 _movp(Dest, T); | 1756 _movp(Dest, T); |
| 1756 } else if (Dest->getType() == IceType_v4i32) { | 1757 } else if (Dest->getType() == IceType_v4i32) { |
| 1757 // Lowering sequence: | 1758 // Lowering sequence: |
| 1758 // Note: The mask arguments have index 0 on the left. | 1759 // Note: The mask arguments have index 0 on the left. |
| 1759 // | 1760 // |
| 1760 // movups T1, Src0 | 1761 // movups T1, Src0 |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1867 _mov(Dest, T); | 1868 _mov(Dest, T); |
| 1868 break; | 1869 break; |
| 1869 case InstArithmetic::Mul: | 1870 case InstArithmetic::Mul: |
| 1870 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1871 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1871 if (optimizeScalarMul(Dest, Src0, C->getValue())) | 1872 if (optimizeScalarMul(Dest, Src0, C->getValue())) |
| 1872 return; | 1873 return; |
| 1873 } | 1874 } |
| 1874 // The 8-bit version of imul only allows the form "imul r/m8" | 1875 // The 8-bit version of imul only allows the form "imul r/m8" |
| 1875 // where T must be in eax. | 1876 // where T must be in eax. |
| 1876 if (isByteSizedArithType(Dest->getType())) { | 1877 if (isByteSizedArithType(Dest->getType())) { |
| 1877 _mov(T, Src0, RegX8632::Reg_eax); | 1878 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1878 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1879 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1879 } else { | 1880 } else { |
| 1880 _mov(T, Src0); | 1881 _mov(T, Src0); |
| 1881 } | 1882 } |
| 1882 _imul(T, Src1); | 1883 _imul(T, Src1); |
| 1883 _mov(Dest, T); | 1884 _mov(Dest, T); |
| 1884 break; | 1885 break; |
| 1885 case InstArithmetic::Shl: | 1886 case InstArithmetic::Shl: |
| 1886 _mov(T, Src0); | 1887 _mov(T, Src0); |
| 1887 if (!llvm::isa<Constant>(Src1)) | 1888 if (!llvm::isa<Constant>(Src1)) |
| 1888 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); | 1889 Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx); |
| 1889 _shl(T, Src1); | 1890 _shl(T, Src1); |
| 1890 _mov(Dest, T); | 1891 _mov(Dest, T); |
| 1891 break; | 1892 break; |
| 1892 case InstArithmetic::Lshr: | 1893 case InstArithmetic::Lshr: |
| 1893 _mov(T, Src0); | 1894 _mov(T, Src0); |
| 1894 if (!llvm::isa<Constant>(Src1)) | 1895 if (!llvm::isa<Constant>(Src1)) |
| 1895 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); | 1896 Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx); |
| 1896 _shr(T, Src1); | 1897 _shr(T, Src1); |
| 1897 _mov(Dest, T); | 1898 _mov(Dest, T); |
| 1898 break; | 1899 break; |
| 1899 case InstArithmetic::Ashr: | 1900 case InstArithmetic::Ashr: |
| 1900 _mov(T, Src0); | 1901 _mov(T, Src0); |
| 1901 if (!llvm::isa<Constant>(Src1)) | 1902 if (!llvm::isa<Constant>(Src1)) |
| 1902 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); | 1903 Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx); |
| 1903 _sar(T, Src1); | 1904 _sar(T, Src1); |
| 1904 _mov(Dest, T); | 1905 _mov(Dest, T); |
| 1905 break; | 1906 break; |
| 1906 case InstArithmetic::Udiv: | 1907 case InstArithmetic::Udiv: |
| 1907 // div and idiv are the few arithmetic operators that do not allow | 1908 // div and idiv are the few arithmetic operators that do not allow |
| 1908 // immediates as the operand. | 1909 // immediates as the operand. |
| 1909 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1910 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1910 if (isByteSizedArithType(Dest->getType())) { | 1911 if (isByteSizedArithType(Dest->getType())) { |
| 1911 Variable *T_ah = nullptr; | 1912 Variable *T_ah = nullptr; |
| 1912 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1913 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
| 1913 _mov(T, Src0, RegX8632::Reg_eax); | 1914 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1914 _mov(T_ah, Zero, RegX8632::Reg_ah); | 1915 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); |
| 1915 _div(T, Src1, T_ah); | 1916 _div(T, Src1, T_ah); |
| 1916 _mov(Dest, T); | 1917 _mov(Dest, T); |
| 1917 } else { | 1918 } else { |
| 1918 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1919 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1919 _mov(T, Src0, RegX8632::Reg_eax); | 1920 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1920 _mov(T_edx, Zero, RegX8632::Reg_edx); | 1921 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); |
| 1921 _div(T, Src1, T_edx); | 1922 _div(T, Src1, T_edx); |
| 1922 _mov(Dest, T); | 1923 _mov(Dest, T); |
| 1923 } | 1924 } |
| 1924 break; | 1925 break; |
| 1925 case InstArithmetic::Sdiv: | 1926 case InstArithmetic::Sdiv: |
| 1926 // TODO(stichnot): Enable this after doing better performance | 1927 // TODO(stichnot): Enable this after doing better performance |
| 1927 // and cross testing. | 1928 // and cross testing. |
| 1928 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1929 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1929 // Optimize division by constant power of 2, but not for Om1 | 1930 // Optimize division by constant power of 2, but not for Om1 |
| 1930 // or O0, just to keep things simple there. | 1931 // or O0, just to keep things simple there. |
| (...skipping 22 matching lines...) Expand all Loading... |
| 1953 _add(T, Src0); | 1954 _add(T, Src0); |
| 1954 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1955 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| 1955 } | 1956 } |
| 1956 _mov(Dest, T); | 1957 _mov(Dest, T); |
| 1957 return; | 1958 return; |
| 1958 } | 1959 } |
| 1959 } | 1960 } |
| 1960 } | 1961 } |
| 1961 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1962 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1962 if (isByteSizedArithType(Dest->getType())) { | 1963 if (isByteSizedArithType(Dest->getType())) { |
| 1963 _mov(T, Src0, RegX8632::Reg_eax); | 1964 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1964 _cbwdq(T, T); | 1965 _cbwdq(T, T); |
| 1965 _idiv(T, Src1, T); | 1966 _idiv(T, Src1, T); |
| 1966 _mov(Dest, T); | 1967 _mov(Dest, T); |
| 1967 } else { | 1968 } else { |
| 1968 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 1969 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 1969 _mov(T, Src0, RegX8632::Reg_eax); | 1970 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1970 _cbwdq(T_edx, T); | 1971 _cbwdq(T_edx, T); |
| 1971 _idiv(T, Src1, T_edx); | 1972 _idiv(T, Src1, T_edx); |
| 1972 _mov(Dest, T); | 1973 _mov(Dest, T); |
| 1973 } | 1974 } |
| 1974 break; | 1975 break; |
| 1975 case InstArithmetic::Urem: | 1976 case InstArithmetic::Urem: |
| 1976 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1977 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1977 if (isByteSizedArithType(Dest->getType())) { | 1978 if (isByteSizedArithType(Dest->getType())) { |
| 1978 Variable *T_ah = nullptr; | 1979 Variable *T_ah = nullptr; |
| 1979 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1980 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
| 1980 _mov(T, Src0, RegX8632::Reg_eax); | 1981 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1981 _mov(T_ah, Zero, RegX8632::Reg_ah); | 1982 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah); |
| 1982 _div(T_ah, Src1, T); | 1983 _div(T_ah, Src1, T); |
| 1983 _mov(Dest, T_ah); | 1984 _mov(Dest, T_ah); |
| 1984 } else { | 1985 } else { |
| 1985 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1986 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1986 _mov(T_edx, Zero, RegX8632::Reg_edx); | 1987 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); |
| 1987 _mov(T, Src0, RegX8632::Reg_eax); | 1988 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1988 _div(T_edx, Src1, T); | 1989 _div(T_edx, Src1, T); |
| 1989 _mov(Dest, T_edx); | 1990 _mov(Dest, T_edx); |
| 1990 } | 1991 } |
| 1991 break; | 1992 break; |
| 1992 case InstArithmetic::Srem: | 1993 case InstArithmetic::Srem: |
| 1993 // TODO(stichnot): Enable this after doing better performance | 1994 // TODO(stichnot): Enable this after doing better performance |
| 1994 // and cross testing. | 1995 // and cross testing. |
| 1995 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1996 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1996 // Optimize mod by constant power of 2, but not for Om1 or O0, | 1997 // Optimize mod by constant power of 2, but not for Om1 or O0, |
| 1997 // just to keep things simple there. | 1998 // just to keep things simple there. |
| (...skipping 27 matching lines...) Expand all Loading... |
| 2025 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 2026 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 2026 _sub(T, Src0); | 2027 _sub(T, Src0); |
| 2027 _neg(T); | 2028 _neg(T); |
| 2028 _mov(Dest, T); | 2029 _mov(Dest, T); |
| 2029 return; | 2030 return; |
| 2030 } | 2031 } |
| 2031 } | 2032 } |
| 2032 } | 2033 } |
| 2033 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 2034 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2034 if (isByteSizedArithType(Dest->getType())) { | 2035 if (isByteSizedArithType(Dest->getType())) { |
| 2035 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah); | 2036 Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah); |
| 2036 _mov(T, Src0, RegX8632::Reg_eax); | 2037 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 2037 _cbwdq(T, T); | 2038 _cbwdq(T, T); |
| 2038 Context.insert(InstFakeDef::create(Func, T_ah)); | 2039 Context.insert(InstFakeDef::create(Func, T_ah)); |
| 2039 _idiv(T_ah, Src1, T); | 2040 _idiv(T_ah, Src1, T); |
| 2040 _mov(Dest, T_ah); | 2041 _mov(Dest, T_ah); |
| 2041 } else { | 2042 } else { |
| 2042 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 2043 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 2043 _mov(T, Src0, RegX8632::Reg_eax); | 2044 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 2044 _cbwdq(T_edx, T); | 2045 _cbwdq(T_edx, T); |
| 2045 _idiv(T_edx, Src1, T); | 2046 _idiv(T_edx, Src1, T); |
| 2046 _mov(Dest, T_edx); | 2047 _mov(Dest, T_edx); |
| 2047 } | 2048 } |
| 2048 break; | 2049 break; |
| 2049 case InstArithmetic::Fadd: | 2050 case InstArithmetic::Fadd: |
| 2050 _mov(T, Src0); | 2051 _mov(T, Src0); |
| 2051 _addss(T, Src1); | 2052 _addss(T, Src1); |
| 2052 _mov(Dest, T); | 2053 _mov(Dest, T); |
| 2053 break; | 2054 break; |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2149 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), | 2150 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), |
| 2150 Inst->getTargetFalse()); | 2151 Inst->getTargetFalse()); |
| 2151 return; | 2152 return; |
| 2152 } | 2153 } |
| 2153 } | 2154 } |
| 2154 } | 2155 } |
| 2155 | 2156 |
| 2156 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2157 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 2157 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2158 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2158 _cmp(Src0, Zero); | 2159 _cmp(Src0, Zero); |
| 2159 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 2160 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| 2160 } | 2161 } |
| 2161 | 2162 |
| 2162 template <class Machine> | 2163 template <class Machine> |
| 2163 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { | 2164 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { |
| 2164 // x86-32 calling convention: | 2165 // x86-32 calling convention: |
| 2165 // | 2166 // |
| 2166 // * At the point before the call, the stack must be aligned to 16 | 2167 // * At the point before the call, the stack must be aligned to 16 |
| 2167 // bytes. | 2168 // bytes. |
| 2168 // | 2169 // |
| 2169 // * The first four arguments of vector type, regardless of their | 2170 // * The first four arguments of vector type, regardless of their |
| (...skipping 26 matching lines...) Expand all Loading... |
| 2196 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 2197 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 2197 assert(typeWidthInBytes(Ty) >= 4); | 2198 assert(typeWidthInBytes(Ty) >= 4); |
| 2198 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | 2199 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| 2199 XmmArgs.push_back(Arg); | 2200 XmmArgs.push_back(Arg); |
| 2200 } else { | 2201 } else { |
| 2201 StackArgs.push_back(Arg); | 2202 StackArgs.push_back(Arg); |
| 2202 if (isVectorType(Arg->getType())) { | 2203 if (isVectorType(Arg->getType())) { |
| 2203 ParameterAreaSizeBytes = | 2204 ParameterAreaSizeBytes = |
| 2204 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 2205 Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 2205 } | 2206 } |
| 2206 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 2207 Variable *esp = |
| 2208 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 2207 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 2209 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 2208 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | 2210 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
| 2209 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 2211 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 2210 } | 2212 } |
| 2211 } | 2213 } |
| 2212 | 2214 |
| 2213 // Adjust the parameter area so that the stack is aligned. It is | 2215 // Adjust the parameter area so that the stack is aligned. It is |
| 2214 // assumed that the stack is already aligned at the start of the | 2216 // assumed that the stack is already aligned at the start of the |
| 2215 // calling sequence. | 2217 // calling sequence. |
| 2216 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 2218 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 2234 // Copy arguments to be passed in registers to the appropriate | 2236 // Copy arguments to be passed in registers to the appropriate |
| 2235 // registers. | 2237 // registers. |
| 2236 // TODO: Investigate the impact of lowering arguments passed in | 2238 // TODO: Investigate the impact of lowering arguments passed in |
| 2237 // registers after lowering stack arguments as opposed to the other | 2239 // registers after lowering stack arguments as opposed to the other |
| 2238 // way around. Lowering register arguments after stack arguments may | 2240 // way around. Lowering register arguments after stack arguments may |
| 2239 // reduce register pressure. On the other hand, lowering register | 2241 // reduce register pressure. On the other hand, lowering register |
| 2240 // arguments first (before stack arguments) may result in more compact | 2242 // arguments first (before stack arguments) may result in more compact |
| 2241 // code, as the memory operand displacements may end up being smaller | 2243 // code, as the memory operand displacements may end up being smaller |
| 2242 // before any stack adjustment is done. | 2244 // before any stack adjustment is done. |
| 2243 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 2245 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| 2244 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i); | 2246 Variable *Reg = |
| 2247 legalizeToVar(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); |
| 2245 // Generate a FakeUse of register arguments so that they do not get | 2248 // Generate a FakeUse of register arguments so that they do not get |
| 2246 // dead code eliminated as a result of the FakeKill of scratch | 2249 // dead code eliminated as a result of the FakeKill of scratch |
| 2247 // registers after the call. | 2250 // registers after the call. |
| 2248 Context.insert(InstFakeUse::create(Func, Reg)); | 2251 Context.insert(InstFakeUse::create(Func, Reg)); |
| 2249 } | 2252 } |
| 2250 // Generate the call instruction. Assign its result to a temporary | 2253 // Generate the call instruction. Assign its result to a temporary |
| 2251 // with high register allocation weight. | 2254 // with high register allocation weight. |
| 2252 Variable *Dest = Instr->getDest(); | 2255 Variable *Dest = Instr->getDest(); |
| 2253 // ReturnReg doubles as ReturnRegLo as necessary. | 2256 // ReturnReg doubles as ReturnRegLo as necessary. |
| 2254 Variable *ReturnReg = nullptr; | 2257 Variable *ReturnReg = nullptr; |
| 2255 Variable *ReturnRegHi = nullptr; | 2258 Variable *ReturnRegHi = nullptr; |
| 2256 if (Dest) { | 2259 if (Dest) { |
| 2257 switch (Dest->getType()) { | 2260 switch (Dest->getType()) { |
| 2258 case IceType_NUM: | 2261 case IceType_NUM: |
| 2259 llvm_unreachable("Invalid Call dest type"); | 2262 llvm_unreachable("Invalid Call dest type"); |
| 2260 break; | 2263 break; |
| 2261 case IceType_void: | 2264 case IceType_void: |
| 2262 break; | 2265 break; |
| 2263 case IceType_i1: | 2266 case IceType_i1: |
| 2264 case IceType_i8: | 2267 case IceType_i8: |
| 2265 case IceType_i16: | 2268 case IceType_i16: |
| 2266 case IceType_i32: | 2269 case IceType_i32: |
| 2267 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax); | 2270 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); |
| 2268 break; | 2271 break; |
| 2269 case IceType_i64: | 2272 case IceType_i64: |
| 2270 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax); | 2273 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 2271 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx); | 2274 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 2272 break; | 2275 break; |
| 2273 case IceType_f32: | 2276 case IceType_f32: |
| 2274 case IceType_f64: | 2277 case IceType_f64: |
| 2275 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with | 2278 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with |
| 2276 // the fstp instruction. | 2279 // the fstp instruction. |
| 2277 break; | 2280 break; |
| 2278 case IceType_v4i1: | 2281 case IceType_v4i1: |
| 2279 case IceType_v8i1: | 2282 case IceType_v8i1: |
| 2280 case IceType_v16i1: | 2283 case IceType_v16i1: |
| 2281 case IceType_v16i8: | 2284 case IceType_v16i8: |
| 2282 case IceType_v8i16: | 2285 case IceType_v8i16: |
| 2283 case IceType_v4i32: | 2286 case IceType_v4i32: |
| 2284 case IceType_v4f32: | 2287 case IceType_v4f32: |
| 2285 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0); | 2288 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); |
| 2286 break; | 2289 break; |
| 2287 } | 2290 } |
| 2288 } | 2291 } |
| 2289 Operand *CallTarget = legalize(Instr->getCallTarget()); | 2292 Operand *CallTarget = legalize(Instr->getCallTarget()); |
| 2290 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 2293 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 2291 if (NeedSandboxing) { | 2294 if (NeedSandboxing) { |
| 2292 if (llvm::isa<Constant>(CallTarget)) { | 2295 if (llvm::isa<Constant>(CallTarget)) { |
| 2293 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | 2296 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 2294 } else { | 2297 } else { |
| 2295 Variable *CallTargetVar = nullptr; | 2298 Variable *CallTargetVar = nullptr; |
| 2296 _mov(CallTargetVar, CallTarget); | 2299 _mov(CallTargetVar, CallTarget); |
| 2297 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | 2300 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 2298 const SizeT BundleSize = | 2301 const SizeT BundleSize = |
| 2299 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); | 2302 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| 2300 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | 2303 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 2301 CallTarget = CallTargetVar; | 2304 CallTarget = CallTargetVar; |
| 2302 } | 2305 } |
| 2303 } | 2306 } |
| 2304 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 2307 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
| 2305 Context.insert(NewCall); | 2308 Context.insert(NewCall); |
| 2306 if (NeedSandboxing) | 2309 if (NeedSandboxing) |
| 2307 _bundle_unlock(); | 2310 _bundle_unlock(); |
| 2308 if (ReturnRegHi) | 2311 if (ReturnRegHi) |
| 2309 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 2312 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| 2310 | 2313 |
| 2311 // Add the appropriate offset to esp. The call instruction takes care | 2314 // Add the appropriate offset to esp. The call instruction takes care |
| 2312 // of resetting the stack offset during emission. | 2315 // of resetting the stack offset during emission. |
| 2313 if (ParameterAreaSizeBytes) { | 2316 if (ParameterAreaSizeBytes) { |
| 2314 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 2317 Variable *esp = |
| 2318 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 2315 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); | 2319 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); |
| 2316 } | 2320 } |
| 2317 | 2321 |
| 2318 // Insert a register-kill pseudo instruction. | 2322 // Insert a register-kill pseudo instruction. |
| 2319 Context.insert(InstFakeKill::create(Func, NewCall)); | 2323 Context.insert(InstFakeKill::create(Func, NewCall)); |
| 2320 | 2324 |
| 2321 // Generate a FakeUse to keep the call live if necessary. | 2325 // Generate a FakeUse to keep the call live if necessary. |
| 2322 if (Instr->hasSideEffects() && ReturnReg) { | 2326 if (Instr->hasSideEffects() && ReturnReg) { |
| 2323 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 2327 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 2324 Context.insert(FakeUse); | 2328 Context.insert(FakeUse); |
| (...skipping 518 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2843 assert(ElementIndex); | 2847 assert(ElementIndex); |
| 2844 | 2848 |
| 2845 unsigned Index = ElementIndex->getValue(); | 2849 unsigned Index = ElementIndex->getValue(); |
| 2846 Type Ty = SourceVectNotLegalized->getType(); | 2850 Type Ty = SourceVectNotLegalized->getType(); |
| 2847 Type ElementTy = typeElementType(Ty); | 2851 Type ElementTy = typeElementType(Ty); |
| 2848 Type InVectorElementTy = Traits::getInVectorElementType(Ty); | 2852 Type InVectorElementTy = Traits::getInVectorElementType(Ty); |
| 2849 Variable *ExtractedElementR = makeReg(InVectorElementTy); | 2853 Variable *ExtractedElementR = makeReg(InVectorElementTy); |
| 2850 | 2854 |
| 2851 // TODO(wala): Determine the best lowering sequences for each type. | 2855 // TODO(wala): Determine the best lowering sequences for each type. |
| 2852 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 2856 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 2853 InstructionSet >= Machine::SSE4_1; | 2857 InstructionSet >= Traits::SSE4_1; |
| 2854 if (CanUsePextr && Ty != IceType_v4f32) { | 2858 if (CanUsePextr && Ty != IceType_v4f32) { |
| 2855 // Use pextrb, pextrw, or pextrd. | 2859 // Use pextrb, pextrw, or pextrd. |
| 2856 Constant *Mask = Ctx->getConstantInt32(Index); | 2860 Constant *Mask = Ctx->getConstantInt32(Index); |
| 2857 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2861 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
| 2858 _pextr(ExtractedElementR, SourceVectR, Mask); | 2862 _pextr(ExtractedElementR, SourceVectR, Mask); |
| 2859 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2863 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2860 // Use pshufd and movd/movss. | 2864 // Use pshufd and movd/movss. |
| 2861 Variable *T = nullptr; | 2865 Variable *T = nullptr; |
| 2862 if (Index) { | 2866 if (Index) { |
| 2863 // The shuffle only needs to occur if the element to be extracted | 2867 // The shuffle only needs to occur if the element to be extracted |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2936 } else if (Condition == InstFcmp::False) { | 2940 } else if (Condition == InstFcmp::False) { |
| 2937 T = makeVectorOfZeros(Dest->getType()); | 2941 T = makeVectorOfZeros(Dest->getType()); |
| 2938 } else { | 2942 } else { |
| 2939 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2943 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2940 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2944 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2941 if (llvm::isa<OperandX8632Mem>(Src1RM)) | 2945 if (llvm::isa<OperandX8632Mem>(Src1RM)) |
| 2942 Src1RM = legalizeToVar(Src1RM); | 2946 Src1RM = legalizeToVar(Src1RM); |
| 2943 | 2947 |
| 2944 switch (Condition) { | 2948 switch (Condition) { |
| 2945 default: { | 2949 default: { |
| 2946 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate; | 2950 typename Traits::Cond::CmppsCond Predicate = |
| 2947 assert(Predicate != CondX86::Cmpps_Invalid); | 2951 Traits::TableFcmp[Index].Predicate; |
| 2952 assert(Predicate != Traits::Cond::Cmpps_Invalid); |
| 2948 T = makeReg(Src0RM->getType()); | 2953 T = makeReg(Src0RM->getType()); |
| 2949 _movp(T, Src0RM); | 2954 _movp(T, Src0RM); |
| 2950 _cmpps(T, Src1RM, Predicate); | 2955 _cmpps(T, Src1RM, Predicate); |
| 2951 } break; | 2956 } break; |
| 2952 case InstFcmp::One: { | 2957 case InstFcmp::One: { |
| 2953 // Check both unequal and ordered. | 2958 // Check both unequal and ordered. |
| 2954 T = makeReg(Src0RM->getType()); | 2959 T = makeReg(Src0RM->getType()); |
| 2955 Variable *T2 = makeReg(Src0RM->getType()); | 2960 Variable *T2 = makeReg(Src0RM->getType()); |
| 2956 _movp(T, Src0RM); | 2961 _movp(T, Src0RM); |
| 2957 _cmpps(T, Src1RM, CondX86::Cmpps_neq); | 2962 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq); |
| 2958 _movp(T2, Src0RM); | 2963 _movp(T2, Src0RM); |
| 2959 _cmpps(T2, Src1RM, CondX86::Cmpps_ord); | 2964 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord); |
| 2960 _pand(T, T2); | 2965 _pand(T, T2); |
| 2961 } break; | 2966 } break; |
| 2962 case InstFcmp::Ueq: { | 2967 case InstFcmp::Ueq: { |
| 2963 // Check both equal or unordered. | 2968 // Check both equal or unordered. |
| 2964 T = makeReg(Src0RM->getType()); | 2969 T = makeReg(Src0RM->getType()); |
| 2965 Variable *T2 = makeReg(Src0RM->getType()); | 2970 Variable *T2 = makeReg(Src0RM->getType()); |
| 2966 _movp(T, Src0RM); | 2971 _movp(T, Src0RM); |
| 2967 _cmpps(T, Src1RM, CondX86::Cmpps_eq); | 2972 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq); |
| 2968 _movp(T2, Src0RM); | 2973 _movp(T2, Src0RM); |
| 2969 _cmpps(T2, Src1RM, CondX86::Cmpps_unord); | 2974 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord); |
| 2970 _por(T, T2); | 2975 _por(T, T2); |
| 2971 } break; | 2976 } break; |
| 2972 } | 2977 } |
| 2973 } | 2978 } |
| 2974 | 2979 |
| 2975 _movp(Dest, T); | 2980 _movp(Dest, T); |
| 2976 eliminateNextVectorSextInstruction(Dest); | 2981 eliminateNextVectorSextInstruction(Dest); |
| 2977 return; | 2982 return; |
| 2978 } | 2983 } |
| 2979 | 2984 |
| 2980 // Lowering a = fcmp cond, b, c | 2985 // Lowering a = fcmp cond, b, c |
| 2981 // ucomiss b, c /* only if C1 != Br_None */ | 2986 // ucomiss b, c /* only if C1 != Br_None */ |
| 2982 // /* but swap b,c order if SwapOperands==true */ | 2987 // /* but swap b,c order if SwapOperands==true */ |
| 2983 // mov a, <default> | 2988 // mov a, <default> |
| 2984 // j<C1> label /* only if C1 != Br_None */ | 2989 // j<C1> label /* only if C1 != Br_None */ |
| 2985 // j<C2> label /* only if C2 != Br_None */ | 2990 // j<C2> label /* only if C2 != Br_None */ |
| 2986 // FakeUse(a) /* only if C1 != Br_None */ | 2991 // FakeUse(a) /* only if C1 != Br_None */ |
| 2987 // mov a, !<default> /* only if C1 != Br_None */ | 2992 // mov a, !<default> /* only if C1 != Br_None */ |
| 2988 // label: /* only if C1 != Br_None */ | 2993 // label: /* only if C1 != Br_None */ |
| 2989 // | 2994 // |
| 2990 // setcc lowering when C1 != Br_None && C2 == Br_None: | 2995 // setcc lowering when C1 != Br_None && C2 == Br_None: |
| 2991 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ | 2996 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ |
| 2992 // setcc a, C1 | 2997 // setcc a, C1 |
| 2993 InstFcmp::FCond Condition = Inst->getCondition(); | 2998 InstFcmp::FCond Condition = Inst->getCondition(); |
| 2994 size_t Index = static_cast<size_t>(Condition); | 2999 size_t Index = static_cast<size_t>(Condition); |
| 2995 assert(Index < Traits::TableFcmpSize); | 3000 assert(Index < Traits::TableFcmpSize); |
| 2996 if (Traits::TableFcmp[Index].SwapScalarOperands) | 3001 if (Traits::TableFcmp[Index].SwapScalarOperands) |
| 2997 std::swap(Src0, Src1); | 3002 std::swap(Src0, Src1); |
| 2998 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None); | 3003 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); |
| 2999 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None); | 3004 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); |
| 3000 if (HasC1) { | 3005 if (HasC1) { |
| 3001 Src0 = legalize(Src0); | 3006 Src0 = legalize(Src0); |
| 3002 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 3007 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 3003 Variable *T = nullptr; | 3008 Variable *T = nullptr; |
| 3004 _mov(T, Src0); | 3009 _mov(T, Src0); |
| 3005 _ucomiss(T, Src1RM); | 3010 _ucomiss(T, Src1RM); |
| 3006 if (!HasC2) { | 3011 if (!HasC2) { |
| 3007 assert(Traits::TableFcmp[Index].Default); | 3012 assert(Traits::TableFcmp[Index].Default); |
| 3008 _setcc(Dest, Traits::TableFcmp[Index].C1); | 3013 _setcc(Dest, Traits::TableFcmp[Index].C1); |
| 3009 return; | 3014 return; |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3147 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | 3152 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); |
| 3148 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | 3153 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); |
| 3149 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 3154 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
| 3150 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 3155 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
| 3151 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3156 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 3152 Constant *One = Ctx->getConstantInt32(1); | 3157 Constant *One = Ctx->getConstantInt32(1); |
| 3153 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | 3158 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
| 3154 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | 3159 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
| 3155 _mov(Dest, One); | 3160 _mov(Dest, One); |
| 3156 _cmp(Src0HiRM, Src1HiRI); | 3161 _cmp(Src0HiRM, Src1HiRI); |
| 3157 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None) | 3162 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) |
| 3158 _br(Traits::TableIcmp64[Index].C1, LabelTrue); | 3163 _br(Traits::TableIcmp64[Index].C1, LabelTrue); |
| 3159 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None) | 3164 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) |
| 3160 _br(Traits::TableIcmp64[Index].C2, LabelFalse); | 3165 _br(Traits::TableIcmp64[Index].C2, LabelFalse); |
| 3161 _cmp(Src0LoRM, Src1LoRI); | 3166 _cmp(Src0LoRM, Src1LoRI); |
| 3162 _br(Traits::TableIcmp64[Index].C3, LabelTrue); | 3167 _br(Traits::TableIcmp64[Index].C3, LabelTrue); |
| 3163 Context.insert(LabelFalse); | 3168 Context.insert(LabelFalse); |
| 3164 _mov_nonkillable(Dest, Zero); | 3169 _mov_nonkillable(Dest, Zero); |
| 3165 Context.insert(LabelTrue); | 3170 Context.insert(LabelTrue); |
| 3166 return; | 3171 return; |
| 3167 } | 3172 } |
| 3168 | 3173 |
| 3169 // cmp b, c | 3174 // cmp b, c |
| (...skipping 21 matching lines...) Expand all Loading... |
| 3191 // Expand the element to the appropriate size for it to be inserted | 3196 // Expand the element to the appropriate size for it to be inserted |
| 3192 // in the vector. | 3197 // in the vector. |
| 3193 Variable *Expanded = Func->template makeVariable(InVectorElementTy); | 3198 Variable *Expanded = Func->template makeVariable(InVectorElementTy); |
| 3194 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, | 3199 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, |
| 3195 ElementToInsertNotLegalized); | 3200 ElementToInsertNotLegalized); |
| 3196 lowerCast(Cast); | 3201 lowerCast(Cast); |
| 3197 ElementToInsertNotLegalized = Expanded; | 3202 ElementToInsertNotLegalized = Expanded; |
| 3198 } | 3203 } |
| 3199 | 3204 |
| 3200 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 3205 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 3201 InstructionSet >= Machine::SSE4_1) { | 3206 InstructionSet >= Traits::SSE4_1) { |
| 3202 // Use insertps, pinsrb, pinsrw, or pinsrd. | 3207 // Use insertps, pinsrb, pinsrw, or pinsrd. |
| 3203 Operand *ElementRM = | 3208 Operand *ElementRM = |
| 3204 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 3209 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
| 3205 Operand *SourceVectRM = | 3210 Operand *SourceVectRM = |
| 3206 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3211 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 3207 Variable *T = makeReg(Ty); | 3212 Variable *T = makeReg(Ty); |
| 3208 _movp(T, SourceVectRM); | 3213 _movp(T, SourceVectRM); |
| 3209 if (Ty == IceType_v4f32) | 3214 if (Ty == IceType_v4f32) |
| 3210 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 3215 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
| 3211 else | 3216 else |
| (...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3605 } | 3610 } |
| 3606 case Intrinsics::Sqrt: { | 3611 case Intrinsics::Sqrt: { |
| 3607 Operand *Src = legalize(Instr->getArg(0)); | 3612 Operand *Src = legalize(Instr->getArg(0)); |
| 3608 Variable *Dest = Instr->getDest(); | 3613 Variable *Dest = Instr->getDest(); |
| 3609 Variable *T = makeReg(Dest->getType()); | 3614 Variable *T = makeReg(Dest->getType()); |
| 3610 _sqrtss(T, Src); | 3615 _sqrtss(T, Src); |
| 3611 _mov(Dest, T); | 3616 _mov(Dest, T); |
| 3612 return; | 3617 return; |
| 3613 } | 3618 } |
| 3614 case Intrinsics::Stacksave: { | 3619 case Intrinsics::Stacksave: { |
| 3615 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 3620 Variable *esp = |
| 3621 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 3616 Variable *Dest = Instr->getDest(); | 3622 Variable *Dest = Instr->getDest(); |
| 3617 _mov(Dest, esp); | 3623 _mov(Dest, esp); |
| 3618 return; | 3624 return; |
| 3619 } | 3625 } |
| 3620 case Intrinsics::Stackrestore: { | 3626 case Intrinsics::Stackrestore: { |
| 3621 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 3627 Variable *esp = |
| 3628 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 3622 _mov_nonkillable(esp, Instr->getArg(0)); | 3629 _mov_nonkillable(esp, Instr->getArg(0)); |
| 3623 return; | 3630 return; |
| 3624 } | 3631 } |
| 3625 case Intrinsics::Trap: | 3632 case Intrinsics::Trap: |
| 3626 _ud2(); | 3633 _ud2(); |
| 3627 return; | 3634 return; |
| 3628 case Intrinsics::UnknownIntrinsic: | 3635 case Intrinsics::UnknownIntrinsic: |
| 3629 Func->setError("Should not be lowering UnknownIntrinsic"); | 3636 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3630 return; | 3637 return; |
| 3631 } | 3638 } |
| 3632 return; | 3639 return; |
| 3633 } | 3640 } |
| 3634 | 3641 |
| 3635 template <class Machine> | 3642 template <class Machine> |
| 3636 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, | 3643 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, |
| 3637 Operand *Ptr, Operand *Expected, | 3644 Operand *Ptr, Operand *Expected, |
| 3638 Operand *Desired) { | 3645 Operand *Desired) { |
| 3639 if (Expected->getType() == IceType_i64) { | 3646 if (Expected->getType() == IceType_i64) { |
| 3640 // Reserve the pre-colored registers first, before adding any more | 3647 // Reserve the pre-colored registers first, before adding any more |
| 3641 // infinite-weight variables from formMemoryOperand's legalization. | 3648 // infinite-weight variables from formMemoryOperand's legalization. |
| 3642 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 3649 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3643 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); | 3650 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3644 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 3651 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3645 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); | 3652 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3646 _mov(T_eax, loOperand(Expected)); | 3653 _mov(T_eax, loOperand(Expected)); |
| 3647 _mov(T_edx, hiOperand(Expected)); | 3654 _mov(T_edx, hiOperand(Expected)); |
| 3648 _mov(T_ebx, loOperand(Desired)); | 3655 _mov(T_ebx, loOperand(Desired)); |
| 3649 _mov(T_ecx, hiOperand(Desired)); | 3656 _mov(T_ecx, hiOperand(Desired)); |
| 3650 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); | 3657 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); |
| 3651 const bool Locked = true; | 3658 const bool Locked = true; |
| 3652 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3659 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3653 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3660 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 3654 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3661 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 3655 _mov(DestLo, T_eax); | 3662 _mov(DestLo, T_eax); |
| 3656 _mov(DestHi, T_edx); | 3663 _mov(DestHi, T_edx); |
| 3657 return; | 3664 return; |
| 3658 } | 3665 } |
| 3659 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); | 3666 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); |
| 3660 _mov(T_eax, Expected); | 3667 _mov(T_eax, Expected); |
| 3661 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); | 3668 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); |
| 3662 Variable *DesiredReg = legalizeToVar(Desired); | 3669 Variable *DesiredReg = legalizeToVar(Desired); |
| 3663 const bool Locked = true; | 3670 const bool Locked = true; |
| 3664 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3671 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 3665 _mov(DestPrev, T_eax); | 3672 _mov(DestPrev, T_eax); |
| 3666 } | 3673 } |
| 3667 | 3674 |
| 3668 template <class Machine> | 3675 template <class Machine> |
| 3669 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, | 3676 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3720 NextBr->isLastUse(NextCmp->getDest())) { | 3727 NextBr->isLastUse(NextCmp->getDest())) { |
| 3721 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); | 3728 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); |
| 3722 for (size_t i = 0; i < PhiAssigns.size(); ++i) { | 3729 for (size_t i = 0; i < PhiAssigns.size(); ++i) { |
| 3723 // Lower the phi assignments now, before the branch (same placement | 3730 // Lower the phi assignments now, before the branch (same placement |
| 3724 // as before). | 3731 // as before). |
| 3725 InstAssign *PhiAssign = PhiAssigns[i]; | 3732 InstAssign *PhiAssign = PhiAssigns[i]; |
| 3726 PhiAssign->setDeleted(); | 3733 PhiAssign->setDeleted(); |
| 3727 lowerAssign(PhiAssign); | 3734 lowerAssign(PhiAssign); |
| 3728 Context.advanceNext(); | 3735 Context.advanceNext(); |
| 3729 } | 3736 } |
| 3730 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); | 3737 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(), |
| 3738 NextBr->getTargetFalse()); |
| 3731 // Skip over the old compare and branch, by deleting them. | 3739 // Skip over the old compare and branch, by deleting them. |
| 3732 NextCmp->setDeleted(); | 3740 NextCmp->setDeleted(); |
| 3733 NextBr->setDeleted(); | 3741 NextBr->setDeleted(); |
| 3734 Context.advanceNext(); | 3742 Context.advanceNext(); |
| 3735 Context.advanceNext(); | 3743 Context.advanceNext(); |
| 3736 return true; | 3744 return true; |
| 3737 } | 3745 } |
| 3738 } | 3746 } |
| 3739 } | 3747 } |
| 3740 return false; | 3748 return false; |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3851 // mov <reg>, eax | 3859 // mov <reg>, eax |
| 3852 // op <reg>, [desired_adj] | 3860 // op <reg>, [desired_adj] |
| 3853 // lock cmpxchg [ptr], <reg> | 3861 // lock cmpxchg [ptr], <reg> |
| 3854 // jne .LABEL | 3862 // jne .LABEL |
| 3855 // mov <dest>, eax | 3863 // mov <dest>, eax |
| 3856 // | 3864 // |
| 3857 // If Op_{Lo,Hi} are nullptr, then just copy the value. | 3865 // If Op_{Lo,Hi} are nullptr, then just copy the value. |
| 3858 Val = legalize(Val); | 3866 Val = legalize(Val); |
| 3859 Type Ty = Val->getType(); | 3867 Type Ty = Val->getType(); |
| 3860 if (Ty == IceType_i64) { | 3868 if (Ty == IceType_i64) { |
| 3861 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 3869 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 3862 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); | 3870 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 3863 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); | 3871 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); |
| 3864 _mov(T_eax, loOperand(Addr)); | 3872 _mov(T_eax, loOperand(Addr)); |
| 3865 _mov(T_edx, hiOperand(Addr)); | 3873 _mov(T_edx, hiOperand(Addr)); |
| 3866 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); | 3874 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 3867 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); | 3875 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); |
| 3868 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3876 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 3869 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; | 3877 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; |
| 3870 if (!IsXchg8b) { | 3878 if (!IsXchg8b) { |
| 3871 Context.insert(Label); | 3879 Context.insert(Label); |
| 3872 _mov(T_ebx, T_eax); | 3880 _mov(T_ebx, T_eax); |
| 3873 (this->*Op_Lo)(T_ebx, loOperand(Val)); | 3881 (this->*Op_Lo)(T_ebx, loOperand(Val)); |
| 3874 _mov(T_ecx, T_edx); | 3882 _mov(T_ecx, T_edx); |
| 3875 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3883 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
| 3876 } else { | 3884 } else { |
| 3877 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3885 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
| 3878 // It just needs the Val loaded into ebx and ecx. | 3886 // It just needs the Val loaded into ebx and ecx. |
| 3879 // That can also be done before the loop. | 3887 // That can also be done before the loop. |
| 3880 _mov(T_ebx, loOperand(Val)); | 3888 _mov(T_ebx, loOperand(Val)); |
| 3881 _mov(T_ecx, hiOperand(Val)); | 3889 _mov(T_ecx, hiOperand(Val)); |
| 3882 Context.insert(Label); | 3890 Context.insert(Label); |
| 3883 } | 3891 } |
| 3884 const bool Locked = true; | 3892 const bool Locked = true; |
| 3885 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3893 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3886 _br(CondX86::Br_ne, Label); | 3894 _br(Traits::Cond::Br_ne, Label); |
| 3887 if (!IsXchg8b) { | 3895 if (!IsXchg8b) { |
| 3888 // If Val is a variable, model the extended live range of Val through | 3896 // If Val is a variable, model the extended live range of Val through |
| 3889 // the end of the loop, since it will be re-used by the loop. | 3897 // the end of the loop, since it will be re-used by the loop. |
| 3890 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3898 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3891 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3899 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
| 3892 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3900 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
| 3893 Context.insert(InstFakeUse::create(Func, ValLo)); | 3901 Context.insert(InstFakeUse::create(Func, ValLo)); |
| 3894 Context.insert(InstFakeUse::create(Func, ValHi)); | 3902 Context.insert(InstFakeUse::create(Func, ValHi)); |
| 3895 } | 3903 } |
| 3896 } else { | 3904 } else { |
| 3897 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3905 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
| 3898 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3906 Context.insert(InstFakeUse::create(Func, T_ebx)); |
| 3899 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3907 Context.insert(InstFakeUse::create(Func, T_ecx)); |
| 3900 } | 3908 } |
| 3901 // The address base (if any) is also reused in the loop. | 3909 // The address base (if any) is also reused in the loop. |
| 3902 if (Variable *Base = Addr->getBase()) | 3910 if (Variable *Base = Addr->getBase()) |
| 3903 Context.insert(InstFakeUse::create(Func, Base)); | 3911 Context.insert(InstFakeUse::create(Func, Base)); |
| 3904 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3912 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3905 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3913 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3906 _mov(DestLo, T_eax); | 3914 _mov(DestLo, T_eax); |
| 3907 _mov(DestHi, T_edx); | 3915 _mov(DestHi, T_edx); |
| 3908 return; | 3916 return; |
| 3909 } | 3917 } |
| 3910 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); | 3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); |
| 3911 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax); | 3919 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); |
| 3912 _mov(T_eax, Addr); | 3920 _mov(T_eax, Addr); |
| 3913 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3921 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 3914 Context.insert(Label); | 3922 Context.insert(Label); |
| 3915 // We want to pick a different register for T than Eax, so don't use | 3923 // We want to pick a different register for T than Eax, so don't use |
| 3916 // _mov(T == nullptr, T_eax). | 3924 // _mov(T == nullptr, T_eax). |
| 3917 Variable *T = makeReg(Ty); | 3925 Variable *T = makeReg(Ty); |
| 3918 _mov(T, T_eax); | 3926 _mov(T, T_eax); |
| 3919 (this->*Op_Lo)(T, Val); | 3927 (this->*Op_Lo)(T, Val); |
| 3920 const bool Locked = true; | 3928 const bool Locked = true; |
| 3921 _cmpxchg(Addr, T_eax, T, Locked); | 3929 _cmpxchg(Addr, T_eax, T, Locked); |
| 3922 _br(CondX86::Br_ne, Label); | 3930 _br(Traits::Cond::Br_ne, Label); |
| 3923 // If Val is a variable, model the extended live range of Val through | 3931 // If Val is a variable, model the extended live range of Val through |
| 3924 // the end of the loop, since it will be re-used by the loop. | 3932 // the end of the loop, since it will be re-used by the loop. |
| 3925 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3933 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 3926 Context.insert(InstFakeUse::create(Func, ValVar)); | 3934 Context.insert(InstFakeUse::create(Func, ValVar)); |
| 3927 } | 3935 } |
| 3928 // The address base (if any) is also reused in the loop. | 3936 // The address base (if any) is also reused in the loop. |
| 3929 if (Variable *Base = Addr->getBase()) | 3937 if (Variable *Base = Addr->getBase()) |
| 3930 Context.insert(InstFakeUse::create(Func, Base)); | 3938 Context.insert(InstFakeUse::create(Func, Base)); |
| 3931 _mov(Dest, T_eax); | 3939 _mov(Dest, T_eax); |
| 3932 } | 3940 } |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3976 } | 3984 } |
| 3977 Variable *T_Dest = makeReg(IceType_i32); | 3985 Variable *T_Dest = makeReg(IceType_i32); |
| 3978 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 3986 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| 3979 Constant *ThirtyOne = Ctx->getConstantInt32(31); | 3987 Constant *ThirtyOne = Ctx->getConstantInt32(31); |
| 3980 if (Cttz) { | 3988 if (Cttz) { |
| 3981 _mov(T_Dest, ThirtyTwo); | 3989 _mov(T_Dest, ThirtyTwo); |
| 3982 } else { | 3990 } else { |
| 3983 Constant *SixtyThree = Ctx->getConstantInt32(63); | 3991 Constant *SixtyThree = Ctx->getConstantInt32(63); |
| 3984 _mov(T_Dest, SixtyThree); | 3992 _mov(T_Dest, SixtyThree); |
| 3985 } | 3993 } |
| 3986 _cmov(T_Dest, T, CondX86::Br_ne); | 3994 _cmov(T_Dest, T, Traits::Cond::Br_ne); |
| 3987 if (!Cttz) { | 3995 if (!Cttz) { |
| 3988 _xor(T_Dest, ThirtyOne); | 3996 _xor(T_Dest, ThirtyOne); |
| 3989 } | 3997 } |
| 3990 if (Ty == IceType_i32) { | 3998 if (Ty == IceType_i32) { |
| 3991 _mov(Dest, T_Dest); | 3999 _mov(Dest, T_Dest); |
| 3992 return; | 4000 return; |
| 3993 } | 4001 } |
| 3994 _add(T_Dest, ThirtyTwo); | 4002 _add(T_Dest, ThirtyTwo); |
| 3995 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 4003 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3996 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 4004 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3997 // Will be using "test" on this, so we need a registerized variable. | 4005 // Will be using "test" on this, so we need a registerized variable. |
| 3998 Variable *SecondVar = legalizeToVar(SecondVal); | 4006 Variable *SecondVar = legalizeToVar(SecondVal); |
| 3999 Variable *T_Dest2 = makeReg(IceType_i32); | 4007 Variable *T_Dest2 = makeReg(IceType_i32); |
| 4000 if (Cttz) { | 4008 if (Cttz) { |
| 4001 _bsf(T_Dest2, SecondVar); | 4009 _bsf(T_Dest2, SecondVar); |
| 4002 } else { | 4010 } else { |
| 4003 _bsr(T_Dest2, SecondVar); | 4011 _bsr(T_Dest2, SecondVar); |
| 4004 _xor(T_Dest2, ThirtyOne); | 4012 _xor(T_Dest2, ThirtyOne); |
| 4005 } | 4013 } |
| 4006 _test(SecondVar, SecondVar); | 4014 _test(SecondVar, SecondVar); |
| 4007 _cmov(T_Dest2, T_Dest, CondX86::Br_e); | 4015 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 4008 _mov(DestLo, T_Dest2); | 4016 _mov(DestLo, T_Dest2); |
| 4009 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 4017 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 4010 } | 4018 } |
| 4011 | 4019 |
| 4012 bool isAdd(const Inst *Inst) { | 4020 bool isAdd(const Inst *Inst) { |
| 4013 if (const InstArithmetic *Arith = | 4021 if (const InstArithmetic *Arith = |
| 4014 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 4022 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 4015 return (Arith->getOp() == InstArithmetic::Add); | 4023 return (Arith->getOp() == InstArithmetic::Add); |
| 4016 } | 4024 } |
| 4017 return false; | 4025 return false; |
| (...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4299 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | 4307 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { |
| 4300 Func->setError("Phi found in regular instruction list"); | 4308 Func->setError("Phi found in regular instruction list"); |
| 4301 } | 4309 } |
| 4302 | 4310 |
| 4303 template <class Machine> | 4311 template <class Machine> |
| 4304 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) { | 4312 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) { |
| 4305 Variable *Reg = nullptr; | 4313 Variable *Reg = nullptr; |
| 4306 if (Inst->hasRetValue()) { | 4314 if (Inst->hasRetValue()) { |
| 4307 Operand *Src0 = legalize(Inst->getRetValue()); | 4315 Operand *Src0 = legalize(Inst->getRetValue()); |
| 4308 if (Src0->getType() == IceType_i64) { | 4316 if (Src0->getType() == IceType_i64) { |
| 4309 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); | 4317 Variable *eax = |
| 4310 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); | 4318 legalizeToVar(loOperand(Src0), Traits::RegisterSet::Reg_eax); |
| 4319 Variable *edx = |
| 4320 legalizeToVar(hiOperand(Src0), Traits::RegisterSet::Reg_edx); |
| 4311 Reg = eax; | 4321 Reg = eax; |
| 4312 Context.insert(InstFakeUse::create(Func, edx)); | 4322 Context.insert(InstFakeUse::create(Func, edx)); |
| 4313 } else if (isScalarFloatingType(Src0->getType())) { | 4323 } else if (isScalarFloatingType(Src0->getType())) { |
| 4314 _fld(Src0); | 4324 _fld(Src0); |
| 4315 } else if (isVectorType(Src0->getType())) { | 4325 } else if (isVectorType(Src0->getType())) { |
| 4316 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); | 4326 Reg = legalizeToVar(Src0, Traits::RegisterSet::Reg_xmm0); |
| 4317 } else { | 4327 } else { |
| 4318 _mov(Reg, Src0, RegX8632::Reg_eax); | 4328 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); |
| 4319 } | 4329 } |
| 4320 } | 4330 } |
| 4321 // Add a ret instruction even if sandboxing is enabled, because | 4331 // Add a ret instruction even if sandboxing is enabled, because |
| 4322 // addEpilog explicitly looks for a ret instruction as a marker for | 4332 // addEpilog explicitly looks for a ret instruction as a marker for |
| 4323 // where to insert the frame removal instructions. | 4333 // where to insert the frame removal instructions. |
| 4324 _ret(Reg); | 4334 _ret(Reg); |
| 4325 // Add a fake use of esp to make sure esp stays alive for the entire | 4335 // Add a fake use of esp to make sure esp stays alive for the entire |
| 4326 // function. Otherwise post-call esp adjustments get dead-code | 4336 // function. Otherwise post-call esp adjustments get dead-code |
| 4327 // eliminated. TODO: Are there more places where the fake use | 4337 // eliminated. TODO: Are there more places where the fake use |
| 4328 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 4338 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
| 4329 // have a ret instruction. | 4339 // have a ret instruction. |
| 4330 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 4340 Variable *esp = |
| 4341 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 4331 Context.insert(InstFakeUse::create(Func, esp)); | 4342 Context.insert(InstFakeUse::create(Func, esp)); |
| 4332 } | 4343 } |
| 4333 | 4344 |
| 4334 template <class Machine> | 4345 template <class Machine> |
| 4335 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { | 4346 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { |
| 4336 Variable *Dest = Inst->getDest(); | 4347 Variable *Dest = Inst->getDest(); |
| 4337 Type DestTy = Dest->getType(); | 4348 Type DestTy = Dest->getType(); |
| 4338 Operand *SrcT = Inst->getTrueOperand(); | 4349 Operand *SrcT = Inst->getTrueOperand(); |
| 4339 Operand *SrcF = Inst->getFalseOperand(); | 4350 Operand *SrcF = Inst->getFalseOperand(); |
| 4340 Operand *Condition = Inst->getCondition(); | 4351 Operand *Condition = Inst->getCondition(); |
| 4341 | 4352 |
| 4342 if (isVectorType(DestTy)) { | 4353 if (isVectorType(DestTy)) { |
| 4343 Type SrcTy = SrcT->getType(); | 4354 Type SrcTy = SrcT->getType(); |
| 4344 Variable *T = makeReg(SrcTy); | 4355 Variable *T = makeReg(SrcTy); |
| 4345 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 4356 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 4346 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 4357 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| 4347 if (InstructionSet >= Machine::SSE4_1) { | 4358 if (InstructionSet >= Traits::SSE4_1) { |
| 4348 // TODO(wala): If the condition operand is a constant, use blendps | 4359 // TODO(wala): If the condition operand is a constant, use blendps |
| 4349 // or pblendw. | 4360 // or pblendw. |
| 4350 // | 4361 // |
| 4351 // Use blendvps or pblendvb to implement select. | 4362 // Use blendvps or pblendvb to implement select. |
| 4352 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 4363 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| 4353 SrcTy == IceType_v4f32) { | 4364 SrcTy == IceType_v4f32) { |
| 4354 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 4365 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4355 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); | 4366 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0); |
| 4356 _movp(xmm0, ConditionRM); | 4367 _movp(xmm0, ConditionRM); |
| 4357 _psll(xmm0, Ctx->getConstantInt8(31)); | 4368 _psll(xmm0, Ctx->getConstantInt8(31)); |
| 4358 _movp(T, SrcFRM); | 4369 _movp(T, SrcFRM); |
| 4359 _blendvps(T, SrcTRM, xmm0); | 4370 _blendvps(T, SrcTRM, xmm0); |
| 4360 _movp(Dest, T); | 4371 _movp(Dest, T); |
| 4361 } else { | 4372 } else { |
| 4362 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 4373 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| 4363 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 4374 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
| 4364 : IceType_v16i8; | 4375 : IceType_v16i8; |
| 4365 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); | 4376 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0); |
| 4366 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 4377 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| 4367 _movp(T, SrcFRM); | 4378 _movp(T, SrcFRM); |
| 4368 _pblendvb(T, SrcTRM, xmm0); | 4379 _pblendvb(T, SrcTRM, xmm0); |
| 4369 _movp(Dest, T); | 4380 _movp(Dest, T); |
| 4370 } | 4381 } |
| 4371 return; | 4382 return; |
| 4372 } | 4383 } |
| 4373 // Lower select without Machine::SSE4.1: | 4384 // Lower select without Traits::SSE4.1: |
| 4374 // a=d?b:c ==> | 4385 // a=d?b:c ==> |
| 4375 // if elementtype(d) != i1: | 4386 // if elementtype(d) != i1: |
| 4376 // d=sext(d); | 4387 // d=sext(d); |
| 4377 // a=(b&d)|(c&~d); | 4388 // a=(b&d)|(c&~d); |
| 4378 Variable *T2 = makeReg(SrcTy); | 4389 Variable *T2 = makeReg(SrcTy); |
| 4379 // Sign extend the condition operand if applicable. | 4390 // Sign extend the condition operand if applicable. |
| 4380 if (SrcTy == IceType_v4f32) { | 4391 if (SrcTy == IceType_v4f32) { |
| 4381 // The sext operation takes only integer arguments. | 4392 // The sext operation takes only integer arguments. |
| 4382 Variable *T3 = Func->template makeVariable(IceType_v4i32); | 4393 Variable *T3 = Func->template makeVariable(IceType_v4i32); |
| 4383 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); | 4394 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| 4384 _movp(T, T3); | 4395 _movp(T, T3); |
| 4385 } else if (typeElementType(SrcTy) != IceType_i1) { | 4396 } else if (typeElementType(SrcTy) != IceType_i1) { |
| 4386 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); | 4397 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); |
| 4387 } else { | 4398 } else { |
| 4388 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 4399 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 4389 _movp(T, ConditionRM); | 4400 _movp(T, ConditionRM); |
| 4390 } | 4401 } |
| 4391 _movp(T2, T); | 4402 _movp(T2, T); |
| 4392 _pand(T, SrcTRM); | 4403 _pand(T, SrcTRM); |
| 4393 _pandn(T2, SrcFRM); | 4404 _pandn(T2, SrcFRM); |
| 4394 _por(T, T2); | 4405 _por(T, T2); |
| 4395 _movp(Dest, T); | 4406 _movp(Dest, T); |
| 4396 | 4407 |
| 4397 return; | 4408 return; |
| 4398 } | 4409 } |
| 4399 | 4410 |
| 4400 CondX86::BrCond Cond = CondX86::Br_ne; | 4411 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne; |
| 4401 Operand *CmpOpnd0 = nullptr; | 4412 Operand *CmpOpnd0 = nullptr; |
| 4402 Operand *CmpOpnd1 = nullptr; | 4413 Operand *CmpOpnd1 = nullptr; |
| 4403 // Handle folding opportunities. | 4414 // Handle folding opportunities. |
| 4404 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { | 4415 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { |
| 4405 assert(Producer->isDeleted()); | 4416 assert(Producer->isDeleted()); |
| 4406 switch (BoolFolding::getProducerKind(Producer)) { | 4417 switch (BoolFolding::getProducerKind(Producer)) { |
| 4407 default: | 4418 default: |
| 4408 break; | 4419 break; |
| 4409 case BoolFolding::PK_Icmp32: { | 4420 case BoolFolding::PK_Icmp32: { |
| 4410 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); | 4421 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4535 Src0Hi = legalizeToVar(Src0Hi); | 4546 Src0Hi = legalizeToVar(Src0Hi); |
| 4536 } else { | 4547 } else { |
| 4537 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | 4548 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); |
| 4538 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 4549 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
| 4539 } | 4550 } |
| 4540 for (SizeT I = 0; I < NumCases; ++I) { | 4551 for (SizeT I = 0; I < NumCases; ++I) { |
| 4541 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); | 4552 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); |
| 4542 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); | 4553 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); |
| 4543 InstX8632Label *Label = InstX8632Label::create(Func, this); | 4554 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 4544 _cmp(Src0Lo, ValueLo); | 4555 _cmp(Src0Lo, ValueLo); |
| 4545 _br(CondX86::Br_ne, Label); | 4556 _br(Traits::Cond::Br_ne, Label); |
| 4546 _cmp(Src0Hi, ValueHi); | 4557 _cmp(Src0Hi, ValueHi); |
| 4547 _br(CondX86::Br_e, Inst->getLabel(I)); | 4558 _br(Traits::Cond::Br_e, Inst->getLabel(I)); |
| 4548 Context.insert(Label); | 4559 Context.insert(Label); |
| 4549 } | 4560 } |
| 4550 _br(Inst->getLabelDefault()); | 4561 _br(Inst->getLabelDefault()); |
| 4551 return; | 4562 return; |
| 4552 } | 4563 } |
| 4553 // OK, we'll be slightly less naive by forcing Src into a physical | 4564 // OK, we'll be slightly less naive by forcing Src into a physical |
| 4554 // register if there are 2 or more uses. | 4565 // register if there are 2 or more uses. |
| 4555 if (NumCases >= 2) | 4566 if (NumCases >= 2) |
| 4556 Src0 = legalizeToVar(Src0); | 4567 Src0 = legalizeToVar(Src0); |
| 4557 else | 4568 else |
| 4558 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | 4569 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); |
| 4559 for (SizeT I = 0; I < NumCases; ++I) { | 4570 for (SizeT I = 0; I < NumCases; ++I) { |
| 4560 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); | 4571 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); |
| 4561 _cmp(Src0, Value); | 4572 _cmp(Src0, Value); |
| 4562 _br(CondX86::Br_e, Inst->getLabel(I)); | 4573 _br(Traits::Cond::Br_e, Inst->getLabel(I)); |
| 4563 } | 4574 } |
| 4564 | 4575 |
| 4565 _br(Inst->getLabelDefault()); | 4576 _br(Inst->getLabelDefault()); |
| 4566 } | 4577 } |
| 4567 | 4578 |
| 4568 template <class Machine> | 4579 template <class Machine> |
| 4569 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4580 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 4570 Variable *Dest, Operand *Src0, | 4581 Variable *Dest, Operand *Src0, |
| 4571 Operand *Src1) { | 4582 Operand *Src1) { |
| 4572 assert(isVectorType(Dest->getType())); | 4583 assert(isVectorType(Dest->getType())); |
| (...skipping 628 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5201 return; | 5212 return; |
| 5202 inferTwoAddress(); | 5213 inferTwoAddress(); |
| 5203 } | 5214 } |
| 5204 | 5215 |
| 5205 template <class Machine> | 5216 template <class Machine> |
| 5206 void TargetX86Base<Machine>::makeRandomRegisterPermutation( | 5217 void TargetX86Base<Machine>::makeRandomRegisterPermutation( |
| 5207 llvm::SmallVectorImpl<int32_t> &Permutation, | 5218 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 5208 const llvm::SmallBitVector &ExcludeRegisters) const { | 5219 const llvm::SmallBitVector &ExcludeRegisters) const { |
| 5209 // TODO(stichnot): Declaring Permutation this way loses type/size | 5220 // TODO(stichnot): Declaring Permutation this way loses type/size |
| 5210 // information. Fix this in conjunction with the caller-side TODO. | 5221 // information. Fix this in conjunction with the caller-side TODO. |
| 5211 assert(Permutation.size() >= RegX8632::Reg_NUM); | 5222 assert(Permutation.size() >= Traits::RegisterSet::Reg_NUM); |
| 5212 // Expected upper bound on the number of registers in a single | 5223 // Expected upper bound on the number of registers in a single |
| 5213 // equivalence class. For x86-32, this would comprise the 8 XMM | 5224 // equivalence class. For x86-32, this would comprise the 8 XMM |
| 5214 // registers. This is for performance, not correctness. | 5225 // registers. This is for performance, not correctness. |
| 5215 static const unsigned MaxEquivalenceClassSize = 8; | 5226 static const unsigned MaxEquivalenceClassSize = 8; |
| 5216 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; | 5227 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; |
| 5217 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; | 5228 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; |
| 5218 EquivalenceClassMap EquivalenceClasses; | 5229 EquivalenceClassMap EquivalenceClasses; |
| 5219 SizeT NumShuffled = 0, NumPreserved = 0; | 5230 SizeT NumShuffled = 0, NumPreserved = 0; |
| 5220 | 5231 |
| 5221 // Build up the equivalence classes of registers by looking at the | 5232 // Build up the equivalence classes of registers by looking at the |
| 5222 // register properties as well as whether the registers should be | 5233 // register properties as well as whether the registers should be |
| 5223 // explicitly excluded from shuffling. | 5234 // explicitly excluded from shuffling. |
| 5224 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 5235 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 5225 frameptr, isI8, isInt, isFP) \ | 5236 frameptr, isI8, isInt, isFP) \ |
| 5226 if (ExcludeRegisters[RegX8632::val]) { \ | 5237 if (ExcludeRegisters[Traits::RegisterSet::val]) { \ |
| 5227 /* val stays the same in the resulting permutation. */ \ | 5238 /* val stays the same in the resulting permutation. */ \ |
| 5228 Permutation[RegX8632::val] = RegX8632::val; \ | 5239 Permutation[Traits::RegisterSet::val] = Traits::RegisterSet::val; \ |
| 5229 ++NumPreserved; \ | 5240 ++NumPreserved; \ |
| 5230 } else { \ | 5241 } else { \ |
| 5231 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \ | 5242 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \ |
| 5232 (isInt << 3) | (isFP << 4); \ | 5243 (isInt << 3) | (isFP << 4); \ |
| 5233 /* val is assigned to an equivalence class based on its properties. */ \ | 5244 /* val is assigned to an equivalence class based on its properties. */ \ |
| 5234 EquivalenceClasses[Index].push_back(RegX8632::val); \ | 5245 EquivalenceClasses[Index].push_back(Traits::RegisterSet::val); \ |
| 5235 } | 5246 } |
| 5236 REGX8632_TABLE | 5247 REGX8632_TABLE |
| 5237 #undef X | 5248 #undef X |
| 5238 | 5249 |
| 5239 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); | 5250 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); |
| 5240 | 5251 |
| 5241 // Shuffle the resulting equivalence classes. | 5252 // Shuffle the resulting equivalence classes. |
| 5242 for (auto I : EquivalenceClasses) { | 5253 for (auto I : EquivalenceClasses) { |
| 5243 const RegisterList &List = I.second; | 5254 const RegisterList &List = I.second; |
| 5244 RegisterList Shuffled(List); | 5255 RegisterList Shuffled(List); |
| 5245 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG); | 5256 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG); |
| 5246 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) { | 5257 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) { |
| 5247 Permutation[List[SI]] = Shuffled[SI]; | 5258 Permutation[List[SI]] = Shuffled[SI]; |
| 5248 ++NumShuffled; | 5259 ++NumShuffled; |
| 5249 } | 5260 } |
| 5250 } | 5261 } |
| 5251 | 5262 |
| 5252 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM); | 5263 assert(NumShuffled + NumPreserved == Traits::RegisterSet::Reg_NUM); |
| 5253 | 5264 |
| 5254 if (Func->isVerbose(IceV_Random)) { | 5265 if (Func->isVerbose(IceV_Random)) { |
| 5255 OstreamLocker L(Func->getContext()); | 5266 OstreamLocker L(Func->getContext()); |
| 5256 Ostream &Str = Func->getContext()->getStrDump(); | 5267 Ostream &Str = Func->getContext()->getStrDump(); |
| 5257 Str << "Register equivalence classes:\n"; | 5268 Str << "Register equivalence classes:\n"; |
| 5258 for (auto I : EquivalenceClasses) { | 5269 for (auto I : EquivalenceClasses) { |
| 5259 Str << "{"; | 5270 Str << "{"; |
| 5260 const RegisterList &List = I.second; | 5271 const RegisterList &List = I.second; |
| 5261 bool First = true; | 5272 bool First = true; |
| 5262 for (int32_t Register : List) { | 5273 for (int32_t Register : List) { |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5496 } | 5507 } |
| 5497 // the offset is not eligible for blinding or pooling, return the original | 5508 // the offset is not eligible for blinding or pooling, return the original |
| 5498 // mem operand | 5509 // mem operand |
| 5499 return MemOperand; | 5510 return MemOperand; |
| 5500 } | 5511 } |
| 5501 | 5512 |
| 5502 } // end of namespace X86Internal | 5513 } // end of namespace X86Internal |
| 5503 } // end of namespace Ice | 5514 } // end of namespace Ice |
| 5504 | 5515 |
| 5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5516 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |