Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1216033004: Move X8632-specific Assembler stuff to Machine Traits. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Removes references to X8632-specific registers/condition codes from the base TargetLoweringX86Base. Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX86Base class, which 10 // This file implements the TargetLoweringX86Base class, which
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 // optimization because it minimizes branches. 170 // optimization because it minimizes branches.
171 template <class MachineTraits> 171 template <class MachineTraits>
172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
173 switch (getProducerKind(Instr)) { 173 switch (getProducerKind(Instr)) {
174 default: 174 default:
175 return false; 175 return false;
176 case PK_Icmp64: 176 case PK_Icmp64:
177 return true; 177 return true;
178 case PK_Fcmp: 178 case PK_Fcmp:
179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
180 .C2 != CondX86::Br_None; 180 .C2 != MachineTraits::Cond::Br_None;
181 } 181 }
182 } 182 }
183 183
184 template <class MachineTraits> 184 template <class MachineTraits>
185 void BoolFolding<MachineTraits>::init(CfgNode *Node) { 185 void BoolFolding<MachineTraits>::init(CfgNode *Node) {
186 Producers.clear(); 186 Producers.clear();
187 for (Inst &Instr : Node->getInsts()) { 187 for (Inst &Instr : Node->getInsts()) {
188 // Check whether Instr is a valid producer. 188 // Check whether Instr is a valid producer.
189 Variable *Var = Instr.getDest(); 189 Variable *Var = Instr.getDest();
190 if (!Instr.isDeleted() // only consider non-deleted instructions 190 if (!Instr.isDeleted() // only consider non-deleted instructions
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 if (Func->getContext()->getFlags().getTargetInstructionSet() != 278 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
279 TargetInstructionSet::BaseInstructionSet) { 279 TargetInstructionSet::BaseInstructionSet) {
280 InstructionSet = static_cast<typename Traits::InstructionSet>( 280 InstructionSet = static_cast<typename Traits::InstructionSet>(
281 (Func->getContext()->getFlags().getTargetInstructionSet() - 281 (Func->getContext()->getFlags().getTargetInstructionSet() -
282 TargetInstructionSet::X86InstructionSet_Begin) + 282 TargetInstructionSet::X86InstructionSet_Begin) +
283 Traits::InstructionSet::Begin); 283 Traits::InstructionSet::Begin);
284 } 284 }
285 // TODO: Don't initialize IntegerRegisters and friends every time. 285 // TODO: Don't initialize IntegerRegisters and friends every time.
286 // Instead, initialize in some sort of static initializer for the 286 // Instead, initialize in some sort of static initializer for the
287 // class. 287 // class.
288 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); 288 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
289 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); 289 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
290 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); 290 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
291 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); 291 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
292 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); 292 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
293 ScratchRegs.resize(RegX8632::Reg_NUM); 293 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
294 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 294 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
295 frameptr, isI8, isInt, isFP) \ 295 frameptr, isI8, isInt, isFP) \
296 IntegerRegisters[RegX8632::val] = isInt; \ 296 IntegerRegisters[Traits::RegisterSet::val] = isInt; \
297 IntegerRegistersI8[RegX8632::val] = isI8; \ 297 IntegerRegistersI8[Traits::RegisterSet::val] = isI8; \
298 FloatRegisters[RegX8632::val] = isFP; \ 298 FloatRegisters[Traits::RegisterSet::val] = isFP; \
299 VectorRegisters[RegX8632::val] = isFP; \ 299 VectorRegisters[Traits::RegisterSet::val] = isFP; \
300 ScratchRegs[RegX8632::val] = scratch; 300 ScratchRegs[Traits::RegisterSet::val] = scratch;
301 REGX8632_TABLE; 301 REGX8632_TABLE;
302 #undef X 302 #undef X
303 TypeToRegisterSet[IceType_void] = InvalidRegisters; 303 TypeToRegisterSet[IceType_void] = InvalidRegisters;
304 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; 304 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
305 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; 305 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
306 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 306 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
307 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 307 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
308 TypeToRegisterSet[IceType_i64] = IntegerRegisters; 308 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
309 TypeToRegisterSet[IceType_f32] = FloatRegisters; 309 TypeToRegisterSet[IceType_f32] = FloatRegisters;
310 TypeToRegisterSet[IceType_f64] = FloatRegisters; 310 TypeToRegisterSet[IceType_f64] = FloatRegisters;
(...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after
733 name, 733 name,
734 REGX8632_TABLE 734 REGX8632_TABLE
735 #undef X 735 #undef X
736 }; 736 };
737 737
738 template <class Machine> 738 template <class Machine>
739 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 739 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
740 if (Ty == IceType_void) 740 if (Ty == IceType_void)
741 Ty = IceType_i32; 741 Ty = IceType_i32;
742 if (PhysicalRegisters[Ty].empty()) 742 if (PhysicalRegisters[Ty].empty())
743 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM); 743 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
744 assert(RegNum < PhysicalRegisters[Ty].size()); 744 assert(RegNum < PhysicalRegisters[Ty].size());
745 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 745 Variable *Reg = PhysicalRegisters[Ty][RegNum];
746 if (Reg == nullptr) { 746 if (Reg == nullptr) {
747 Reg = Func->template makeVariable(Ty); 747 Reg = Func->template makeVariable(Ty);
748 Reg->setRegNum(RegNum); 748 Reg->setRegNum(RegNum);
749 PhysicalRegisters[Ty][RegNum] = Reg; 749 PhysicalRegisters[Ty][RegNum] = Reg;
750 // Specially mark esp as an "argument" so that it is considered 750 // Specially mark esp as an "argument" so that it is considered
751 // live upon function entry. 751 // live upon function entry.
752 if (RegNum == RegX8632::Reg_esp) { 752 if (RegNum == Traits::RegisterSet::Reg_esp) {
753 Func->addImplicitArg(Reg); 753 Func->addImplicitArg(Reg);
754 Reg->setIgnoreLiveness(); 754 Reg->setIgnoreLiveness();
755 } 755 }
756 } 756 }
757 return Reg; 757 return Reg;
758 } 758 }
759 759
760 template <class Machine> 760 template <class Machine>
761 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 761 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
762 assert(RegNum < RegX8632::Reg_NUM); 762 assert(RegNum < Traits::RegisterSet::Reg_NUM);
763 static IceString RegNames8[] = { 763 static IceString RegNames8[] = {
764 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 764 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
765 frameptr, isI8, isInt, isFP) \ 765 frameptr, isI8, isInt, isFP) \
766 name8, 766 name8,
767 REGX8632_TABLE 767 REGX8632_TABLE
768 #undef X 768 #undef X
769 }; 769 };
770 static IceString RegNames16[] = { 770 static IceString RegNames16[] = {
771 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 771 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
772 frameptr, isI8, isInt, isFP) \ 772 frameptr, isI8, isInt, isFP) \
(...skipping 25 matching lines...) Expand all
798 int32_t Offset = Var->getStackOffset(); 798 int32_t Offset = Var->getStackOffset();
799 if (!hasFramePointer()) 799 if (!hasFramePointer())
800 Offset += getStackAdjustment(); 800 Offset += getStackAdjustment();
801 if (Offset) 801 if (Offset)
802 Str << Offset; 802 Str << Offset;
803 const Type FrameSPTy = IceType_i32; 803 const Type FrameSPTy = IceType_i32;
804 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")"; 804 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
805 } 805 }
806 806
807 template <class Machine> 807 template <class Machine>
808 X8632::Address 808 typename TargetX86Base<Machine>::Traits::Address
809 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { 809 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
810 if (Var->hasReg()) 810 if (Var->hasReg())
811 llvm_unreachable("Stack Variable has a register assigned"); 811 llvm_unreachable("Stack Variable has a register assigned");
812 if (Var->getWeight().isInf()) { 812 if (Var->getWeight().isInf()) {
813 llvm_unreachable("Infinite-weight Variable has no register assigned"); 813 llvm_unreachable("Infinite-weight Variable has no register assigned");
814 } 814 }
815 int32_t Offset = Var->getStackOffset(); 815 int32_t Offset = Var->getStackOffset();
816 if (!hasFramePointer()) 816 if (!hasFramePointer())
817 Offset += getStackAdjustment(); 817 Offset += getStackAdjustment();
818 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset); 818 return typename Traits::Address(
819 Traits::RegisterSet::getEncodedGPR(getFrameOrStackReg()), Offset);
819 } 820 }
820 821
821 template <class Machine> void TargetX86Base<Machine>::lowerArguments() { 822 template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
822 VarList &Args = Func->getArgs(); 823 VarList &Args = Func->getArgs();
823 // The first four arguments of vector type, regardless of their 824 // The first four arguments of vector type, regardless of their
824 // position relative to the other arguments in the argument list, are 825 // position relative to the other arguments in the argument list, are
825 // passed in registers xmm0 - xmm3. 826 // passed in registers xmm0 - xmm3.
826 unsigned NumXmmArgs = 0; 827 unsigned NumXmmArgs = 0;
827 828
828 Context.init(Func->getEntryNode()); 829 Context.init(Func->getEntryNode());
829 Context.setInsertPoint(Context.getCur()); 830 Context.setInsertPoint(Context.getCur());
830 831
831 for (SizeT I = 0, E = Args.size(); 832 for (SizeT I = 0, E = Args.size();
832 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { 833 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
833 Variable *Arg = Args[I]; 834 Variable *Arg = Args[I];
834 Type Ty = Arg->getType(); 835 Type Ty = Arg->getType();
835 if (!isVectorType(Ty)) 836 if (!isVectorType(Ty))
836 continue; 837 continue;
837 // Replace Arg in the argument list with the home register. Then 838 // Replace Arg in the argument list with the home register. Then
838 // generate an instruction in the prolog to copy the home register 839 // generate an instruction in the prolog to copy the home register
839 // to the assigned location of Arg. 840 // to the assigned location of Arg.
840 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; 841 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
841 ++NumXmmArgs; 842 ++NumXmmArgs;
842 Variable *RegisterArg = Func->template makeVariable(Ty); 843 Variable *RegisterArg = Func->template makeVariable(Ty);
843 if (BuildDefs::dump()) 844 if (BuildDefs::dump())
844 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); 845 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
845 RegisterArg->setRegNum(RegNum); 846 RegisterArg->setRegNum(RegNum);
846 RegisterArg->setIsArg(); 847 RegisterArg->setIsArg();
847 Arg->setIsArg(false); 848 Arg->setIsArg(false);
848 849
849 Args[I] = RegisterArg; 850 Args[I] = RegisterArg;
850 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 851 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
990 _push(getPhysicalRegister(i)); 991 _push(getPhysicalRegister(i));
991 } 992 }
992 } 993 }
993 Ctx->statsUpdateRegistersSaved(NumCallee); 994 Ctx->statsUpdateRegistersSaved(NumCallee);
994 995
995 // Generate "push ebp; mov ebp, esp" 996 // Generate "push ebp; mov ebp, esp"
996 if (IsEbpBasedFrame) { 997 if (IsEbpBasedFrame) {
997 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 998 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
998 .count() == 0); 999 .count() == 0);
999 PreservedRegsSizeBytes += 4; 1000 PreservedRegsSizeBytes += 4;
1000 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); 1001 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
1001 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 1002 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
1002 _push(ebp); 1003 _push(ebp);
1003 _mov(ebp, esp); 1004 _mov(ebp, esp);
1004 // Keep ebp live for late-stage liveness analysis 1005 // Keep ebp live for late-stage liveness analysis
1005 // (e.g. asm-verbose mode). 1006 // (e.g. asm-verbose mode).
1006 Context.insert(InstFakeUse::create(Func, ebp)); 1007 Context.insert(InstFakeUse::create(Func, ebp));
1007 } 1008 }
1008 1009
1009 // Align the variables area. SpillAreaPaddingBytes is the size of 1010 // Align the variables area. SpillAreaPaddingBytes is the size of
1010 // the region after the preserved registers and before the spill areas. 1011 // the region after the preserved registers and before the spill areas.
1011 // LocalsSlotsPaddingBytes is the amount of padding between the globals 1012 // LocalsSlotsPaddingBytes is the amount of padding between the globals
(...skipping 14 matching lines...) Expand all
1026 if (NeedsStackAlignment) { 1027 if (NeedsStackAlignment) {
1027 uint32_t StackOffset = 1028 uint32_t StackOffset =
1028 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 1029 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
1029 uint32_t StackSize = 1030 uint32_t StackSize =
1030 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); 1031 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1031 SpillAreaSizeBytes = StackSize - StackOffset; 1032 SpillAreaSizeBytes = StackSize - StackOffset;
1032 } 1033 }
1033 1034
1034 // Generate "sub esp, SpillAreaSizeBytes" 1035 // Generate "sub esp, SpillAreaSizeBytes"
1035 if (SpillAreaSizeBytes) 1036 if (SpillAreaSizeBytes)
1036 _sub(getPhysicalRegister(RegX8632::Reg_esp), 1037 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
1037 Ctx->getConstantInt32(SpillAreaSizeBytes)); 1038 Ctx->getConstantInt32(SpillAreaSizeBytes));
1038 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 1039 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1039 1040
1040 resetStackAdjustment(); 1041 resetStackAdjustment();
1041 1042
1042 // Fill in stack offsets for stack args, and copy args into registers 1043 // Fill in stack offsets for stack args, and copy args into registers
1043 // for those that were register-allocated. Args are pushed right to 1044 // for those that were register-allocated. Args are pushed right to
1044 // left, so Arg[0] is closest to the stack/frame pointer. 1045 // left, so Arg[0] is closest to the stack/frame pointer.
1045 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 1046 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1046 size_t BasicFrameOffset = 1047 size_t BasicFrameOffset =
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
1110 if (RI == E) 1111 if (RI == E)
1111 return; 1112 return;
1112 1113
1113 // Convert the reverse_iterator position into its corresponding 1114 // Convert the reverse_iterator position into its corresponding
1114 // (forward) iterator position. 1115 // (forward) iterator position.
1115 InstList::iterator InsertPoint = RI.base(); 1116 InstList::iterator InsertPoint = RI.base();
1116 --InsertPoint; 1117 --InsertPoint;
1117 Context.init(Node); 1118 Context.init(Node);
1118 Context.setInsertPoint(InsertPoint); 1119 Context.setInsertPoint(InsertPoint);
1119 1120
1120 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 1121 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
1121 if (IsEbpBasedFrame) { 1122 if (IsEbpBasedFrame) {
1122 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); 1123 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
1123 // For late-stage liveness analysis (e.g. asm-verbose mode), 1124 // For late-stage liveness analysis (e.g. asm-verbose mode),
1124 // adding a fake use of esp before the assignment of esp=ebp keeps 1125 // adding a fake use of esp before the assignment of esp=ebp keeps
1125 // previous esp adjustments from being dead-code eliminated. 1126 // previous esp adjustments from being dead-code eliminated.
1126 Context.insert(InstFakeUse::create(Func, esp)); 1127 Context.insert(InstFakeUse::create(Func, esp));
1127 _mov(esp, ebp); 1128 _mov(esp, ebp);
1128 _pop(ebp); 1129 _pop(ebp);
1129 } else { 1130 } else {
1130 // add esp, SpillAreaSizeBytes 1131 // add esp, SpillAreaSizeBytes
1131 if (SpillAreaSizeBytes) 1132 if (SpillAreaSizeBytes)
1132 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); 1133 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
1133 } 1134 }
1134 1135
1135 // Add pop instructions for preserved registers. 1136 // Add pop instructions for preserved registers.
1136 llvm::SmallBitVector CalleeSaves = 1137 llvm::SmallBitVector CalleeSaves =
1137 getRegisterSet(RegSet_CalleeSave, RegSet_None); 1138 getRegisterSet(RegSet_CalleeSave, RegSet_None);
1138 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 1139 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1139 SizeT j = CalleeSaves.size() - i - 1; 1140 SizeT j = CalleeSaves.size() - i - 1;
1140 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) 1141 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
1141 continue; 1142 continue;
1142 if (CalleeSaves[j] && RegsUsed[j]) { 1143 if (CalleeSaves[j] && RegsUsed[j]) {
1143 _pop(getPhysicalRegister(j)); 1144 _pop(getPhysicalRegister(j));
1144 } 1145 }
1145 } 1146 }
1146 1147
1147 if (!Ctx->getFlags().getUseSandboxing()) 1148 if (!Ctx->getFlags().getUseSandboxing())
1148 return; 1149 return;
1149 // Change the original ret instruction into a sandboxed return sequence. 1150 // Change the original ret instruction into a sandboxed return sequence.
1150 // t:ecx = pop 1151 // t:ecx = pop
1151 // bundle_lock 1152 // bundle_lock
1152 // and t, ~31 1153 // and t, ~31
1153 // jmp *t 1154 // jmp *t
1154 // bundle_unlock 1155 // bundle_unlock
1155 // FakeUse <original_ret_operand> 1156 // FakeUse <original_ret_operand>
1156 const SizeT BundleSize = 1157 const SizeT BundleSize =
1157 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); 1158 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
1158 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 1159 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
1159 _pop(T_ecx); 1160 _pop(T_ecx);
1160 _bundle_lock(); 1161 _bundle_lock();
1161 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); 1162 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
1162 _jmp(T_ecx); 1163 _jmp(T_ecx);
1163 _bundle_unlock(); 1164 _bundle_unlock();
1164 if (RI->getSrcSize()) { 1165 if (RI->getSrcSize()) {
1165 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1166 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
1166 Context.insert(InstFakeUse::create(Func, RetValue)); 1167 Context.insert(InstFakeUse::create(Func, RetValue));
1167 } 1168 }
1168 RI->setDeleted(); 1169 RI->setDeleted();
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1266 return legalize(MemOperand); 1267 return legalize(MemOperand);
1267 } 1268 }
1268 llvm_unreachable("Unsupported operand type"); 1269 llvm_unreachable("Unsupported operand type");
1269 return nullptr; 1270 return nullptr;
1270 } 1271 }
1271 1272
1272 template <class Machine> 1273 template <class Machine>
1273 llvm::SmallBitVector 1274 llvm::SmallBitVector
1274 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 1275 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
1275 RegSetMask Exclude) const { 1276 RegSetMask Exclude) const {
1276 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); 1277 llvm::SmallBitVector Registers(Traits::RegisterSet::Reg_NUM);
1277 1278
1278 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 1279 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
1279 frameptr, isI8, isInt, isFP) \ 1280 frameptr, isI8, isInt, isFP) \
1280 if (scratch && (Include & RegSet_CallerSave)) \ 1281 if (scratch && (Include & RegSet_CallerSave)) \
1281 Registers[RegX8632::val] = true; \ 1282 Registers[Traits::RegisterSet::val] = true; \
1282 if (preserved && (Include & RegSet_CalleeSave)) \ 1283 if (preserved && (Include & RegSet_CalleeSave)) \
1283 Registers[RegX8632::val] = true; \ 1284 Registers[Traits::RegisterSet::val] = true; \
1284 if (stackptr && (Include & RegSet_StackPointer)) \ 1285 if (stackptr && (Include & RegSet_StackPointer)) \
1285 Registers[RegX8632::val] = true; \ 1286 Registers[Traits::RegisterSet::val] = true; \
1286 if (frameptr && (Include & RegSet_FramePointer)) \ 1287 if (frameptr && (Include & RegSet_FramePointer)) \
1287 Registers[RegX8632::val] = true; \ 1288 Registers[Traits::RegisterSet::val] = true; \
1288 if (scratch && (Exclude & RegSet_CallerSave)) \ 1289 if (scratch && (Exclude & RegSet_CallerSave)) \
1289 Registers[RegX8632::val] = false; \ 1290 Registers[Traits::RegisterSet::val] = false; \
1290 if (preserved && (Exclude & RegSet_CalleeSave)) \ 1291 if (preserved && (Exclude & RegSet_CalleeSave)) \
1291 Registers[RegX8632::val] = false; \ 1292 Registers[Traits::RegisterSet::val] = false; \
1292 if (stackptr && (Exclude & RegSet_StackPointer)) \ 1293 if (stackptr && (Exclude & RegSet_StackPointer)) \
1293 Registers[RegX8632::val] = false; \ 1294 Registers[Traits::RegisterSet::val] = false; \
1294 if (frameptr && (Exclude & RegSet_FramePointer)) \ 1295 if (frameptr && (Exclude & RegSet_FramePointer)) \
1295 Registers[RegX8632::val] = false; 1296 Registers[Traits::RegisterSet::val] = false;
1296 1297
1297 REGX8632_TABLE 1298 REGX8632_TABLE
1298 1299
1299 #undef X 1300 #undef X
1300 1301
1301 return Registers; 1302 return Registers;
1302 } 1303 }
1303 1304
1304 template <class Machine> 1305 template <class Machine>
1305 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 1306 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
1306 IsEbpBasedFrame = true; 1307 IsEbpBasedFrame = true;
1307 // Conservatively require the stack to be aligned. Some stack 1308 // Conservatively require the stack to be aligned. Some stack
1308 // adjustment operations implemented below assume that the stack is 1309 // adjustment operations implemented below assume that the stack is
1309 // aligned before the alloca. All the alloca code ensures that the 1310 // aligned before the alloca. All the alloca code ensures that the
1310 // stack alignment is preserved after the alloca. The stack alignment 1311 // stack alignment is preserved after the alloca. The stack alignment
1311 // restriction can be relaxed in some cases. 1312 // restriction can be relaxed in some cases.
1312 NeedsStackAlignment = true; 1313 NeedsStackAlignment = true;
1313 1314
1314 // TODO(stichnot): minimize the number of adjustments of esp, etc. 1315 // TODO(stichnot): minimize the number of adjustments of esp, etc.
1315 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 1316 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
1316 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 1317 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1317 Variable *Dest = Inst->getDest(); 1318 Variable *Dest = Inst->getDest();
1318 uint32_t AlignmentParam = Inst->getAlignInBytes(); 1319 uint32_t AlignmentParam = Inst->getAlignInBytes();
1319 // For default align=0, set it to the real value 1, to avoid any 1320 // For default align=0, set it to the real value 1, to avoid any
1320 // bit-manipulation problems below. 1321 // bit-manipulation problems below.
1321 AlignmentParam = std::max(AlignmentParam, 1u); 1322 AlignmentParam = std::max(AlignmentParam, 1u);
1322 1323
1323 // LLVM enforces power of 2 alignment. 1324 // LLVM enforces power of 2 alignment.
1324 assert(llvm::isPowerOf2_32(AlignmentParam)); 1325 assert(llvm::isPowerOf2_32(AlignmentParam));
1325 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 1326 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
1544 case InstArithmetic::Sub: 1545 case InstArithmetic::Sub:
1545 _mov(T_Lo, Src0Lo); 1546 _mov(T_Lo, Src0Lo);
1546 _sub(T_Lo, Src1Lo); 1547 _sub(T_Lo, Src1Lo);
1547 _mov(DestLo, T_Lo); 1548 _mov(DestLo, T_Lo);
1548 _mov(T_Hi, Src0Hi); 1549 _mov(T_Hi, Src0Hi);
1549 _sbb(T_Hi, Src1Hi); 1550 _sbb(T_Hi, Src1Hi);
1550 _mov(DestHi, T_Hi); 1551 _mov(DestHi, T_Hi);
1551 break; 1552 break;
1552 case InstArithmetic::Mul: { 1553 case InstArithmetic::Mul: {
1553 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1554 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1554 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax); 1555 Variable *T_4Lo = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1555 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx); 1556 Variable *T_4Hi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
1556 // gcc does the following: 1557 // gcc does the following:
1557 // a=b*c ==> 1558 // a=b*c ==>
1558 // t1 = b.hi; t1 *=(imul) c.lo 1559 // t1 = b.hi; t1 *=(imul) c.lo
1559 // t2 = c.hi; t2 *=(imul) b.lo 1560 // t2 = c.hi; t2 *=(imul) b.lo
1560 // t3:eax = b.lo 1561 // t3:eax = b.lo
1561 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo 1562 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1562 // a.lo = t4.lo 1563 // a.lo = t4.lo
1563 // t4.hi += t1 1564 // t4.hi += t1
1564 // t4.hi += t2 1565 // t4.hi += t2
1565 // a.hi = t4.hi 1566 // a.hi = t4.hi
1566 // The mul instruction cannot take an immediate operand. 1567 // The mul instruction cannot take an immediate operand.
1567 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); 1568 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1568 _mov(T_1, Src0Hi); 1569 _mov(T_1, Src0Hi);
1569 _imul(T_1, Src1Lo); 1570 _imul(T_1, Src1Lo);
1570 _mov(T_2, Src1Hi); 1571 _mov(T_2, Src1Hi);
1571 _imul(T_2, Src0Lo); 1572 _imul(T_2, Src0Lo);
1572 _mov(T_3, Src0Lo, RegX8632::Reg_eax); 1573 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
1573 _mul(T_4Lo, T_3, Src1Lo); 1574 _mul(T_4Lo, T_3, Src1Lo);
1574 // The mul instruction produces two dest variables, edx:eax. We 1575 // The mul instruction produces two dest variables, edx:eax. We
1575 // create a fake definition of edx to account for this. 1576 // create a fake definition of edx to account for this.
1576 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); 1577 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1577 _mov(DestLo, T_4Lo); 1578 _mov(DestLo, T_4Lo);
1578 _add(T_4Hi, T_1); 1579 _add(T_4Hi, T_1);
1579 _add(T_4Hi, T_2); 1580 _add(T_4Hi, T_2);
1580 _mov(DestHi, T_4Hi); 1581 _mov(DestHi, T_4Hi);
1581 } break; 1582 } break;
1582 case InstArithmetic::Shl: { 1583 case InstArithmetic::Shl: {
(...skipping 10 matching lines...) Expand all
1593 // use(t3) 1594 // use(t3)
1594 // t3 = t2 1595 // t3 = t2
1595 // t2 = 0 1596 // t2 = 0
1596 // L1: 1597 // L1:
1597 // a.lo = t2 1598 // a.lo = t2
1598 // a.hi = t3 1599 // a.hi = t3
1599 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1600 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1600 Constant *BitTest = Ctx->getConstantInt32(0x20); 1601 Constant *BitTest = Ctx->getConstantInt32(0x20);
1601 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1602 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1602 InstX8632Label *Label = InstX8632Label::create(Func, this); 1603 InstX8632Label *Label = InstX8632Label::create(Func, this);
1603 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1604 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1604 _mov(T_2, Src0Lo); 1605 _mov(T_2, Src0Lo);
1605 _mov(T_3, Src0Hi); 1606 _mov(T_3, Src0Hi);
1606 _shld(T_3, T_2, T_1); 1607 _shld(T_3, T_2, T_1);
1607 _shl(T_2, T_1); 1608 _shl(T_2, T_1);
1608 _test(T_1, BitTest); 1609 _test(T_1, BitTest);
1609 _br(CondX86::Br_e, Label); 1610 _br(Traits::Cond::Br_e, Label);
1610 // T_2 and T_3 are being assigned again because of the 1611 // T_2 and T_3 are being assigned again because of the
1611 // intra-block control flow, so we need the _mov_nonkillable 1612 // intra-block control flow, so we need the _mov_nonkillable
1612 // variant to avoid liveness problems. 1613 // variant to avoid liveness problems.
1613 _mov_nonkillable(T_3, T_2); 1614 _mov_nonkillable(T_3, T_2);
1614 _mov_nonkillable(T_2, Zero); 1615 _mov_nonkillable(T_2, Zero);
1615 Context.insert(Label); 1616 Context.insert(Label);
1616 _mov(DestLo, T_2); 1617 _mov(DestLo, T_2);
1617 _mov(DestHi, T_3); 1618 _mov(DestHi, T_3);
1618 } break; 1619 } break;
1619 case InstArithmetic::Lshr: { 1620 case InstArithmetic::Lshr: {
1620 // a=b>>c (unsigned) ==> 1621 // a=b>>c (unsigned) ==>
1621 // t1:ecx = c.lo & 0xff 1622 // t1:ecx = c.lo & 0xff
1622 // t2 = b.lo 1623 // t2 = b.lo
1623 // t3 = b.hi 1624 // t3 = b.hi
1624 // t2 = shrd t2, t3, t1 1625 // t2 = shrd t2, t3, t1
1625 // t3 = shr t3, t1 1626 // t3 = shr t3, t1
1626 // test t1, 0x20 1627 // test t1, 0x20
1627 // je L1 1628 // je L1
1628 // use(t2) 1629 // use(t2)
1629 // t2 = t3 1630 // t2 = t3
1630 // t3 = 0 1631 // t3 = 0
1631 // L1: 1632 // L1:
1632 // a.lo = t2 1633 // a.lo = t2
1633 // a.hi = t3 1634 // a.hi = t3
1634 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1635 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1635 Constant *BitTest = Ctx->getConstantInt32(0x20); 1636 Constant *BitTest = Ctx->getConstantInt32(0x20);
1636 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1637 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1637 InstX8632Label *Label = InstX8632Label::create(Func, this); 1638 InstX8632Label *Label = InstX8632Label::create(Func, this);
1638 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1639 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1639 _mov(T_2, Src0Lo); 1640 _mov(T_2, Src0Lo);
1640 _mov(T_3, Src0Hi); 1641 _mov(T_3, Src0Hi);
1641 _shrd(T_2, T_3, T_1); 1642 _shrd(T_2, T_3, T_1);
1642 _shr(T_3, T_1); 1643 _shr(T_3, T_1);
1643 _test(T_1, BitTest); 1644 _test(T_1, BitTest);
1644 _br(CondX86::Br_e, Label); 1645 _br(Traits::Cond::Br_e, Label);
1645 // T_2 and T_3 are being assigned again because of the 1646 // T_2 and T_3 are being assigned again because of the
1646 // intra-block control flow, so we need the _mov_nonkillable 1647 // intra-block control flow, so we need the _mov_nonkillable
1647 // variant to avoid liveness problems. 1648 // variant to avoid liveness problems.
1648 _mov_nonkillable(T_2, T_3); 1649 _mov_nonkillable(T_2, T_3);
1649 _mov_nonkillable(T_3, Zero); 1650 _mov_nonkillable(T_3, Zero);
1650 Context.insert(Label); 1651 Context.insert(Label);
1651 _mov(DestLo, T_2); 1652 _mov(DestLo, T_2);
1652 _mov(DestHi, T_3); 1653 _mov(DestHi, T_3);
1653 } break; 1654 } break;
1654 case InstArithmetic::Ashr: { 1655 case InstArithmetic::Ashr: {
1655 // a=b>>c (signed) ==> 1656 // a=b>>c (signed) ==>
1656 // t1:ecx = c.lo & 0xff 1657 // t1:ecx = c.lo & 0xff
1657 // t2 = b.lo 1658 // t2 = b.lo
1658 // t3 = b.hi 1659 // t3 = b.hi
1659 // t2 = shrd t2, t3, t1 1660 // t2 = shrd t2, t3, t1
1660 // t3 = sar t3, t1 1661 // t3 = sar t3, t1
1661 // test t1, 0x20 1662 // test t1, 0x20
1662 // je L1 1663 // je L1
1663 // use(t2) 1664 // use(t2)
1664 // t2 = t3 1665 // t2 = t3
1665 // t3 = sar t3, 0x1f 1666 // t3 = sar t3, 0x1f
1666 // L1: 1667 // L1:
1667 // a.lo = t2 1668 // a.lo = t2
1668 // a.hi = t3 1669 // a.hi = t3
1669 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1670 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1670 Constant *BitTest = Ctx->getConstantInt32(0x20); 1671 Constant *BitTest = Ctx->getConstantInt32(0x20);
1671 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1672 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1672 InstX8632Label *Label = InstX8632Label::create(Func, this); 1673 InstX8632Label *Label = InstX8632Label::create(Func, this);
1673 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1674 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1674 _mov(T_2, Src0Lo); 1675 _mov(T_2, Src0Lo);
1675 _mov(T_3, Src0Hi); 1676 _mov(T_3, Src0Hi);
1676 _shrd(T_2, T_3, T_1); 1677 _shrd(T_2, T_3, T_1);
1677 _sar(T_3, T_1); 1678 _sar(T_3, T_1);
1678 _test(T_1, BitTest); 1679 _test(T_1, BitTest);
1679 _br(CondX86::Br_e, Label); 1680 _br(Traits::Cond::Br_e, Label);
1680 // T_2 and T_3 are being assigned again because of the 1681 // T_2 and T_3 are being assigned again because of the
1681 // intra-block control flow, so T_2 needs the _mov_nonkillable 1682 // intra-block control flow, so T_2 needs the _mov_nonkillable
1682 // variant to avoid liveness problems. T_3 doesn't need special 1683 // variant to avoid liveness problems. T_3 doesn't need special
1683 // treatment because it is reassigned via _sar instead of _mov. 1684 // treatment because it is reassigned via _sar instead of _mov.
1684 _mov_nonkillable(T_2, T_3); 1685 _mov_nonkillable(T_2, T_3);
1685 _sar(T_3, SignExtend); 1686 _sar(T_3, SignExtend);
1686 Context.insert(Label); 1687 Context.insert(Label);
1687 _mov(DestLo, T_2); 1688 _mov(DestLo, T_2);
1688 _mov(DestHi, T_3); 1689 _mov(DestHi, T_3);
1689 } break; 1690 } break;
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
1740 case InstArithmetic::Sub: { 1741 case InstArithmetic::Sub: {
1741 Variable *T = makeReg(Dest->getType()); 1742 Variable *T = makeReg(Dest->getType());
1742 _movp(T, Src0); 1743 _movp(T, Src0);
1743 _psub(T, Src1); 1744 _psub(T, Src1);
1744 _movp(Dest, T); 1745 _movp(Dest, T);
1745 } break; 1746 } break;
1746 case InstArithmetic::Mul: { 1747 case InstArithmetic::Mul: {
1747 bool TypesAreValidForPmull = 1748 bool TypesAreValidForPmull =
1748 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1749 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1749 bool InstructionSetIsValidForPmull = 1750 bool InstructionSetIsValidForPmull =
1750 Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1; 1751 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
1751 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1752 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1752 Variable *T = makeReg(Dest->getType()); 1753 Variable *T = makeReg(Dest->getType());
1753 _movp(T, Src0); 1754 _movp(T, Src0);
1754 _pmull(T, Src1); 1755 _pmull(T, Src1);
1755 _movp(Dest, T); 1756 _movp(Dest, T);
1756 } else if (Dest->getType() == IceType_v4i32) { 1757 } else if (Dest->getType() == IceType_v4i32) {
1757 // Lowering sequence: 1758 // Lowering sequence:
1758 // Note: The mask arguments have index 0 on the left. 1759 // Note: The mask arguments have index 0 on the left.
1759 // 1760 //
1760 // movups T1, Src0 1761 // movups T1, Src0
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
1867 _mov(Dest, T); 1868 _mov(Dest, T);
1868 break; 1869 break;
1869 case InstArithmetic::Mul: 1870 case InstArithmetic::Mul:
1870 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1871 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1871 if (optimizeScalarMul(Dest, Src0, C->getValue())) 1872 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1872 return; 1873 return;
1873 } 1874 }
1874 // The 8-bit version of imul only allows the form "imul r/m8" 1875 // The 8-bit version of imul only allows the form "imul r/m8"
1875 // where T must be in eax. 1876 // where T must be in eax.
1876 if (isByteSizedArithType(Dest->getType())) { 1877 if (isByteSizedArithType(Dest->getType())) {
1877 _mov(T, Src0, RegX8632::Reg_eax); 1878 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1878 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1879 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1879 } else { 1880 } else {
1880 _mov(T, Src0); 1881 _mov(T, Src0);
1881 } 1882 }
1882 _imul(T, Src1); 1883 _imul(T, Src1);
1883 _mov(Dest, T); 1884 _mov(Dest, T);
1884 break; 1885 break;
1885 case InstArithmetic::Shl: 1886 case InstArithmetic::Shl:
1886 _mov(T, Src0); 1887 _mov(T, Src0);
1887 if (!llvm::isa<Constant>(Src1)) 1888 if (!llvm::isa<Constant>(Src1))
1888 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); 1889 Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx);
1889 _shl(T, Src1); 1890 _shl(T, Src1);
1890 _mov(Dest, T); 1891 _mov(Dest, T);
1891 break; 1892 break;
1892 case InstArithmetic::Lshr: 1893 case InstArithmetic::Lshr:
1893 _mov(T, Src0); 1894 _mov(T, Src0);
1894 if (!llvm::isa<Constant>(Src1)) 1895 if (!llvm::isa<Constant>(Src1))
1895 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); 1896 Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx);
1896 _shr(T, Src1); 1897 _shr(T, Src1);
1897 _mov(Dest, T); 1898 _mov(Dest, T);
1898 break; 1899 break;
1899 case InstArithmetic::Ashr: 1900 case InstArithmetic::Ashr:
1900 _mov(T, Src0); 1901 _mov(T, Src0);
1901 if (!llvm::isa<Constant>(Src1)) 1902 if (!llvm::isa<Constant>(Src1))
1902 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); 1903 Src1 = legalizeToVar(Src1, Traits::RegisterSet::Reg_ecx);
1903 _sar(T, Src1); 1904 _sar(T, Src1);
1904 _mov(Dest, T); 1905 _mov(Dest, T);
1905 break; 1906 break;
1906 case InstArithmetic::Udiv: 1907 case InstArithmetic::Udiv:
1907 // div and idiv are the few arithmetic operators that do not allow 1908 // div and idiv are the few arithmetic operators that do not allow
1908 // immediates as the operand. 1909 // immediates as the operand.
1909 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1910 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1910 if (isByteSizedArithType(Dest->getType())) { 1911 if (isByteSizedArithType(Dest->getType())) {
1911 Variable *T_ah = nullptr; 1912 Variable *T_ah = nullptr;
1912 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1913 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1913 _mov(T, Src0, RegX8632::Reg_eax); 1914 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1914 _mov(T_ah, Zero, RegX8632::Reg_ah); 1915 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
1915 _div(T, Src1, T_ah); 1916 _div(T, Src1, T_ah);
1916 _mov(Dest, T); 1917 _mov(Dest, T);
1917 } else { 1918 } else {
1918 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1919 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1919 _mov(T, Src0, RegX8632::Reg_eax); 1920 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1920 _mov(T_edx, Zero, RegX8632::Reg_edx); 1921 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
1921 _div(T, Src1, T_edx); 1922 _div(T, Src1, T_edx);
1922 _mov(Dest, T); 1923 _mov(Dest, T);
1923 } 1924 }
1924 break; 1925 break;
1925 case InstArithmetic::Sdiv: 1926 case InstArithmetic::Sdiv:
1926 // TODO(stichnot): Enable this after doing better performance 1927 // TODO(stichnot): Enable this after doing better performance
1927 // and cross testing. 1928 // and cross testing.
1928 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1929 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1929 // Optimize division by constant power of 2, but not for Om1 1930 // Optimize division by constant power of 2, but not for Om1
1930 // or O0, just to keep things simple there. 1931 // or O0, just to keep things simple there.
(...skipping 22 matching lines...) Expand all
1953 _add(T, Src0); 1954 _add(T, Src0);
1954 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1955 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1955 } 1956 }
1956 _mov(Dest, T); 1957 _mov(Dest, T);
1957 return; 1958 return;
1958 } 1959 }
1959 } 1960 }
1960 } 1961 }
1961 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1962 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1962 if (isByteSizedArithType(Dest->getType())) { 1963 if (isByteSizedArithType(Dest->getType())) {
1963 _mov(T, Src0, RegX8632::Reg_eax); 1964 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1964 _cbwdq(T, T); 1965 _cbwdq(T, T);
1965 _idiv(T, Src1, T); 1966 _idiv(T, Src1, T);
1966 _mov(Dest, T); 1967 _mov(Dest, T);
1967 } else { 1968 } else {
1968 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 1969 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
1969 _mov(T, Src0, RegX8632::Reg_eax); 1970 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1970 _cbwdq(T_edx, T); 1971 _cbwdq(T_edx, T);
1971 _idiv(T, Src1, T_edx); 1972 _idiv(T, Src1, T_edx);
1972 _mov(Dest, T); 1973 _mov(Dest, T);
1973 } 1974 }
1974 break; 1975 break;
1975 case InstArithmetic::Urem: 1976 case InstArithmetic::Urem:
1976 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1977 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1977 if (isByteSizedArithType(Dest->getType())) { 1978 if (isByteSizedArithType(Dest->getType())) {
1978 Variable *T_ah = nullptr; 1979 Variable *T_ah = nullptr;
1979 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1980 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1980 _mov(T, Src0, RegX8632::Reg_eax); 1981 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1981 _mov(T_ah, Zero, RegX8632::Reg_ah); 1982 _mov(T_ah, Zero, Traits::RegisterSet::Reg_ah);
1982 _div(T_ah, Src1, T); 1983 _div(T_ah, Src1, T);
1983 _mov(Dest, T_ah); 1984 _mov(Dest, T_ah);
1984 } else { 1985 } else {
1985 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1986 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1986 _mov(T_edx, Zero, RegX8632::Reg_edx); 1987 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
1987 _mov(T, Src0, RegX8632::Reg_eax); 1988 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1988 _div(T_edx, Src1, T); 1989 _div(T_edx, Src1, T);
1989 _mov(Dest, T_edx); 1990 _mov(Dest, T_edx);
1990 } 1991 }
1991 break; 1992 break;
1992 case InstArithmetic::Srem: 1993 case InstArithmetic::Srem:
1993 // TODO(stichnot): Enable this after doing better performance 1994 // TODO(stichnot): Enable this after doing better performance
1994 // and cross testing. 1995 // and cross testing.
1995 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1996 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1996 // Optimize mod by constant power of 2, but not for Om1 or O0, 1997 // Optimize mod by constant power of 2, but not for Om1 or O0,
1997 // just to keep things simple there. 1998 // just to keep things simple there.
(...skipping 27 matching lines...) Expand all
2025 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 2026 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
2026 _sub(T, Src0); 2027 _sub(T, Src0);
2027 _neg(T); 2028 _neg(T);
2028 _mov(Dest, T); 2029 _mov(Dest, T);
2029 return; 2030 return;
2030 } 2031 }
2031 } 2032 }
2032 } 2033 }
2033 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 2034 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
2034 if (isByteSizedArithType(Dest->getType())) { 2035 if (isByteSizedArithType(Dest->getType())) {
2035 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah); 2036 Variable *T_ah = makeReg(IceType_i8, Traits::RegisterSet::Reg_ah);
2036 _mov(T, Src0, RegX8632::Reg_eax); 2037 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
2037 _cbwdq(T, T); 2038 _cbwdq(T, T);
2038 Context.insert(InstFakeDef::create(Func, T_ah)); 2039 Context.insert(InstFakeDef::create(Func, T_ah));
2039 _idiv(T_ah, Src1, T); 2040 _idiv(T_ah, Src1, T);
2040 _mov(Dest, T_ah); 2041 _mov(Dest, T_ah);
2041 } else { 2042 } else {
2042 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 2043 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2043 _mov(T, Src0, RegX8632::Reg_eax); 2044 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
2044 _cbwdq(T_edx, T); 2045 _cbwdq(T_edx, T);
2045 _idiv(T_edx, Src1, T); 2046 _idiv(T_edx, Src1, T);
2046 _mov(Dest, T_edx); 2047 _mov(Dest, T_edx);
2047 } 2048 }
2048 break; 2049 break;
2049 case InstArithmetic::Fadd: 2050 case InstArithmetic::Fadd:
2050 _mov(T, Src0); 2051 _mov(T, Src0);
2051 _addss(T, Src1); 2052 _addss(T, Src1);
2052 _mov(Dest, T); 2053 _mov(Dest, T);
2053 break; 2054 break;
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
2149 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), 2150 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
2150 Inst->getTargetFalse()); 2151 Inst->getTargetFalse());
2151 return; 2152 return;
2152 } 2153 }
2153 } 2154 }
2154 } 2155 }
2155 2156
2156 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 2157 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2157 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2158 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2158 _cmp(Src0, Zero); 2159 _cmp(Src0, Zero);
2159 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 2160 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
2160 } 2161 }
2161 2162
2162 template <class Machine> 2163 template <class Machine>
2163 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { 2164 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
2164 // x86-32 calling convention: 2165 // x86-32 calling convention:
2165 // 2166 //
2166 // * At the point before the call, the stack must be aligned to 16 2167 // * At the point before the call, the stack must be aligned to 16
2167 // bytes. 2168 // bytes.
2168 // 2169 //
2169 // * The first four arguments of vector type, regardless of their 2170 // * The first four arguments of vector type, regardless of their
(...skipping 26 matching lines...) Expand all
2196 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 2197 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2197 assert(typeWidthInBytes(Ty) >= 4); 2198 assert(typeWidthInBytes(Ty) >= 4);
2198 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { 2199 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
2199 XmmArgs.push_back(Arg); 2200 XmmArgs.push_back(Arg);
2200 } else { 2201 } else {
2201 StackArgs.push_back(Arg); 2202 StackArgs.push_back(Arg);
2202 if (isVectorType(Arg->getType())) { 2203 if (isVectorType(Arg->getType())) {
2203 ParameterAreaSizeBytes = 2204 ParameterAreaSizeBytes =
2204 Traits::applyStackAlignment(ParameterAreaSizeBytes); 2205 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2205 } 2206 }
2206 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 2207 Variable *esp =
2208 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
2207 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 2209 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2208 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 2210 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
2209 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 2211 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2210 } 2212 }
2211 } 2213 }
2212 2214
2213 // Adjust the parameter area so that the stack is aligned. It is 2215 // Adjust the parameter area so that the stack is aligned. It is
2214 // assumed that the stack is already aligned at the start of the 2216 // assumed that the stack is already aligned at the start of the
2215 // calling sequence. 2217 // calling sequence.
2216 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 2218 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
(...skipping 17 matching lines...) Expand all
2234 // Copy arguments to be passed in registers to the appropriate 2236 // Copy arguments to be passed in registers to the appropriate
2235 // registers. 2237 // registers.
2236 // TODO: Investigate the impact of lowering arguments passed in 2238 // TODO: Investigate the impact of lowering arguments passed in
2237 // registers after lowering stack arguments as opposed to the other 2239 // registers after lowering stack arguments as opposed to the other
2238 // way around. Lowering register arguments after stack arguments may 2240 // way around. Lowering register arguments after stack arguments may
2239 // reduce register pressure. On the other hand, lowering register 2241 // reduce register pressure. On the other hand, lowering register
2240 // arguments first (before stack arguments) may result in more compact 2242 // arguments first (before stack arguments) may result in more compact
2241 // code, as the memory operand displacements may end up being smaller 2243 // code, as the memory operand displacements may end up being smaller
2242 // before any stack adjustment is done. 2244 // before any stack adjustment is done.
2243 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { 2245 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
2244 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i); 2246 Variable *Reg =
2247 legalizeToVar(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
2245 // Generate a FakeUse of register arguments so that they do not get 2248 // Generate a FakeUse of register arguments so that they do not get
2246 // dead code eliminated as a result of the FakeKill of scratch 2249 // dead code eliminated as a result of the FakeKill of scratch
2247 // registers after the call. 2250 // registers after the call.
2248 Context.insert(InstFakeUse::create(Func, Reg)); 2251 Context.insert(InstFakeUse::create(Func, Reg));
2249 } 2252 }
2250 // Generate the call instruction. Assign its result to a temporary 2253 // Generate the call instruction. Assign its result to a temporary
2251 // with high register allocation weight. 2254 // with high register allocation weight.
2252 Variable *Dest = Instr->getDest(); 2255 Variable *Dest = Instr->getDest();
2253 // ReturnReg doubles as ReturnRegLo as necessary. 2256 // ReturnReg doubles as ReturnRegLo as necessary.
2254 Variable *ReturnReg = nullptr; 2257 Variable *ReturnReg = nullptr;
2255 Variable *ReturnRegHi = nullptr; 2258 Variable *ReturnRegHi = nullptr;
2256 if (Dest) { 2259 if (Dest) {
2257 switch (Dest->getType()) { 2260 switch (Dest->getType()) {
2258 case IceType_NUM: 2261 case IceType_NUM:
2259 llvm_unreachable("Invalid Call dest type"); 2262 llvm_unreachable("Invalid Call dest type");
2260 break; 2263 break;
2261 case IceType_void: 2264 case IceType_void:
2262 break; 2265 break;
2263 case IceType_i1: 2266 case IceType_i1:
2264 case IceType_i8: 2267 case IceType_i8:
2265 case IceType_i16: 2268 case IceType_i16:
2266 case IceType_i32: 2269 case IceType_i32:
2267 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax); 2270 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
2268 break; 2271 break;
2269 case IceType_i64: 2272 case IceType_i64:
2270 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax); 2273 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2271 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx); 2274 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2272 break; 2275 break;
2273 case IceType_f32: 2276 case IceType_f32:
2274 case IceType_f64: 2277 case IceType_f64:
2275 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with 2278 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2276 // the fstp instruction. 2279 // the fstp instruction.
2277 break; 2280 break;
2278 case IceType_v4i1: 2281 case IceType_v4i1:
2279 case IceType_v8i1: 2282 case IceType_v8i1:
2280 case IceType_v16i1: 2283 case IceType_v16i1:
2281 case IceType_v16i8: 2284 case IceType_v16i8:
2282 case IceType_v8i16: 2285 case IceType_v8i16:
2283 case IceType_v4i32: 2286 case IceType_v4i32:
2284 case IceType_v4f32: 2287 case IceType_v4f32:
2285 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0); 2288 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
2286 break; 2289 break;
2287 } 2290 }
2288 } 2291 }
2289 Operand *CallTarget = legalize(Instr->getCallTarget()); 2292 Operand *CallTarget = legalize(Instr->getCallTarget());
2290 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); 2293 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2291 if (NeedSandboxing) { 2294 if (NeedSandboxing) {
2292 if (llvm::isa<Constant>(CallTarget)) { 2295 if (llvm::isa<Constant>(CallTarget)) {
2293 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2296 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2294 } else { 2297 } else {
2295 Variable *CallTargetVar = nullptr; 2298 Variable *CallTargetVar = nullptr;
2296 _mov(CallTargetVar, CallTarget); 2299 _mov(CallTargetVar, CallTarget);
2297 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2300 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2298 const SizeT BundleSize = 2301 const SizeT BundleSize =
2299 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); 2302 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
2300 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); 2303 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2301 CallTarget = CallTargetVar; 2304 CallTarget = CallTargetVar;
2302 } 2305 }
2303 } 2306 }
2304 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 2307 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
2305 Context.insert(NewCall); 2308 Context.insert(NewCall);
2306 if (NeedSandboxing) 2309 if (NeedSandboxing)
2307 _bundle_unlock(); 2310 _bundle_unlock();
2308 if (ReturnRegHi) 2311 if (ReturnRegHi)
2309 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2312 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2310 2313
2311 // Add the appropriate offset to esp. The call instruction takes care 2314 // Add the appropriate offset to esp. The call instruction takes care
2312 // of resetting the stack offset during emission. 2315 // of resetting the stack offset during emission.
2313 if (ParameterAreaSizeBytes) { 2316 if (ParameterAreaSizeBytes) {
2314 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 2317 Variable *esp =
2318 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
2315 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); 2319 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
2316 } 2320 }
2317 2321
2318 // Insert a register-kill pseudo instruction. 2322 // Insert a register-kill pseudo instruction.
2319 Context.insert(InstFakeKill::create(Func, NewCall)); 2323 Context.insert(InstFakeKill::create(Func, NewCall));
2320 2324
2321 // Generate a FakeUse to keep the call live if necessary. 2325 // Generate a FakeUse to keep the call live if necessary.
2322 if (Instr->hasSideEffects() && ReturnReg) { 2326 if (Instr->hasSideEffects() && ReturnReg) {
2323 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 2327 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
2324 Context.insert(FakeUse); 2328 Context.insert(FakeUse);
(...skipping 518 matching lines...) Expand 10 before | Expand all | Expand 10 after
2843 assert(ElementIndex); 2847 assert(ElementIndex);
2844 2848
2845 unsigned Index = ElementIndex->getValue(); 2849 unsigned Index = ElementIndex->getValue();
2846 Type Ty = SourceVectNotLegalized->getType(); 2850 Type Ty = SourceVectNotLegalized->getType();
2847 Type ElementTy = typeElementType(Ty); 2851 Type ElementTy = typeElementType(Ty);
2848 Type InVectorElementTy = Traits::getInVectorElementType(Ty); 2852 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2849 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2853 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2850 2854
2851 // TODO(wala): Determine the best lowering sequences for each type. 2855 // TODO(wala): Determine the best lowering sequences for each type.
2852 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || 2856 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
2853 InstructionSet >= Machine::SSE4_1; 2857 InstructionSet >= Traits::SSE4_1;
2854 if (CanUsePextr && Ty != IceType_v4f32) { 2858 if (CanUsePextr && Ty != IceType_v4f32) {
2855 // Use pextrb, pextrw, or pextrd. 2859 // Use pextrb, pextrw, or pextrd.
2856 Constant *Mask = Ctx->getConstantInt32(Index); 2860 Constant *Mask = Ctx->getConstantInt32(Index);
2857 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2861 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2858 _pextr(ExtractedElementR, SourceVectR, Mask); 2862 _pextr(ExtractedElementR, SourceVectR, Mask);
2859 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2863 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2860 // Use pshufd and movd/movss. 2864 // Use pshufd and movd/movss.
2861 Variable *T = nullptr; 2865 Variable *T = nullptr;
2862 if (Index) { 2866 if (Index) {
2863 // The shuffle only needs to occur if the element to be extracted 2867 // The shuffle only needs to occur if the element to be extracted
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
2936 } else if (Condition == InstFcmp::False) { 2940 } else if (Condition == InstFcmp::False) {
2937 T = makeVectorOfZeros(Dest->getType()); 2941 T = makeVectorOfZeros(Dest->getType());
2938 } else { 2942 } else {
2939 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2943 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2940 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2944 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2941 if (llvm::isa<OperandX8632Mem>(Src1RM)) 2945 if (llvm::isa<OperandX8632Mem>(Src1RM))
2942 Src1RM = legalizeToVar(Src1RM); 2946 Src1RM = legalizeToVar(Src1RM);
2943 2947
2944 switch (Condition) { 2948 switch (Condition) {
2945 default: { 2949 default: {
2946 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate; 2950 typename Traits::Cond::CmppsCond Predicate =
2947 assert(Predicate != CondX86::Cmpps_Invalid); 2951 Traits::TableFcmp[Index].Predicate;
2952 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2948 T = makeReg(Src0RM->getType()); 2953 T = makeReg(Src0RM->getType());
2949 _movp(T, Src0RM); 2954 _movp(T, Src0RM);
2950 _cmpps(T, Src1RM, Predicate); 2955 _cmpps(T, Src1RM, Predicate);
2951 } break; 2956 } break;
2952 case InstFcmp::One: { 2957 case InstFcmp::One: {
2953 // Check both unequal and ordered. 2958 // Check both unequal and ordered.
2954 T = makeReg(Src0RM->getType()); 2959 T = makeReg(Src0RM->getType());
2955 Variable *T2 = makeReg(Src0RM->getType()); 2960 Variable *T2 = makeReg(Src0RM->getType());
2956 _movp(T, Src0RM); 2961 _movp(T, Src0RM);
2957 _cmpps(T, Src1RM, CondX86::Cmpps_neq); 2962 _cmpps(T, Src1RM, Traits::Cond::Cmpps_neq);
2958 _movp(T2, Src0RM); 2963 _movp(T2, Src0RM);
2959 _cmpps(T2, Src1RM, CondX86::Cmpps_ord); 2964 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_ord);
2960 _pand(T, T2); 2965 _pand(T, T2);
2961 } break; 2966 } break;
2962 case InstFcmp::Ueq: { 2967 case InstFcmp::Ueq: {
2963 // Check both equal or unordered. 2968 // Check both equal or unordered.
2964 T = makeReg(Src0RM->getType()); 2969 T = makeReg(Src0RM->getType());
2965 Variable *T2 = makeReg(Src0RM->getType()); 2970 Variable *T2 = makeReg(Src0RM->getType());
2966 _movp(T, Src0RM); 2971 _movp(T, Src0RM);
2967 _cmpps(T, Src1RM, CondX86::Cmpps_eq); 2972 _cmpps(T, Src1RM, Traits::Cond::Cmpps_eq);
2968 _movp(T2, Src0RM); 2973 _movp(T2, Src0RM);
2969 _cmpps(T2, Src1RM, CondX86::Cmpps_unord); 2974 _cmpps(T2, Src1RM, Traits::Cond::Cmpps_unord);
2970 _por(T, T2); 2975 _por(T, T2);
2971 } break; 2976 } break;
2972 } 2977 }
2973 } 2978 }
2974 2979
2975 _movp(Dest, T); 2980 _movp(Dest, T);
2976 eliminateNextVectorSextInstruction(Dest); 2981 eliminateNextVectorSextInstruction(Dest);
2977 return; 2982 return;
2978 } 2983 }
2979 2984
2980 // Lowering a = fcmp cond, b, c 2985 // Lowering a = fcmp cond, b, c
2981 // ucomiss b, c /* only if C1 != Br_None */ 2986 // ucomiss b, c /* only if C1 != Br_None */
2982 // /* but swap b,c order if SwapOperands==true */ 2987 // /* but swap b,c order if SwapOperands==true */
2983 // mov a, <default> 2988 // mov a, <default>
2984 // j<C1> label /* only if C1 != Br_None */ 2989 // j<C1> label /* only if C1 != Br_None */
2985 // j<C2> label /* only if C2 != Br_None */ 2990 // j<C2> label /* only if C2 != Br_None */
2986 // FakeUse(a) /* only if C1 != Br_None */ 2991 // FakeUse(a) /* only if C1 != Br_None */
2987 // mov a, !<default> /* only if C1 != Br_None */ 2992 // mov a, !<default> /* only if C1 != Br_None */
2988 // label: /* only if C1 != Br_None */ 2993 // label: /* only if C1 != Br_None */
2989 // 2994 //
2990 // setcc lowering when C1 != Br_None && C2 == Br_None: 2995 // setcc lowering when C1 != Br_None && C2 == Br_None:
2991 // ucomiss b, c /* but swap b,c order if SwapOperands==true */ 2996 // ucomiss b, c /* but swap b,c order if SwapOperands==true */
2992 // setcc a, C1 2997 // setcc a, C1
2993 InstFcmp::FCond Condition = Inst->getCondition(); 2998 InstFcmp::FCond Condition = Inst->getCondition();
2994 size_t Index = static_cast<size_t>(Condition); 2999 size_t Index = static_cast<size_t>(Condition);
2995 assert(Index < Traits::TableFcmpSize); 3000 assert(Index < Traits::TableFcmpSize);
2996 if (Traits::TableFcmp[Index].SwapScalarOperands) 3001 if (Traits::TableFcmp[Index].SwapScalarOperands)
2997 std::swap(Src0, Src1); 3002 std::swap(Src0, Src1);
2998 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None); 3003 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2999 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None); 3004 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
3000 if (HasC1) { 3005 if (HasC1) {
3001 Src0 = legalize(Src0); 3006 Src0 = legalize(Src0);
3002 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 3007 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
3003 Variable *T = nullptr; 3008 Variable *T = nullptr;
3004 _mov(T, Src0); 3009 _mov(T, Src0);
3005 _ucomiss(T, Src1RM); 3010 _ucomiss(T, Src1RM);
3006 if (!HasC2) { 3011 if (!HasC2) {
3007 assert(Traits::TableFcmp[Index].Default); 3012 assert(Traits::TableFcmp[Index].Default);
3008 _setcc(Dest, Traits::TableFcmp[Index].C1); 3013 _setcc(Dest, Traits::TableFcmp[Index].C1);
3009 return; 3014 return;
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
3147 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 3152 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3148 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 3153 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3149 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 3154 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3150 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 3155 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
3151 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3156 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3152 Constant *One = Ctx->getConstantInt32(1); 3157 Constant *One = Ctx->getConstantInt32(1);
3153 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); 3158 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
3154 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); 3159 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
3155 _mov(Dest, One); 3160 _mov(Dest, One);
3156 _cmp(Src0HiRM, Src1HiRI); 3161 _cmp(Src0HiRM, Src1HiRI);
3157 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None) 3162 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
3158 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 3163 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
3159 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None) 3164 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
3160 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 3165 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
3161 _cmp(Src0LoRM, Src1LoRI); 3166 _cmp(Src0LoRM, Src1LoRI);
3162 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 3167 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
3163 Context.insert(LabelFalse); 3168 Context.insert(LabelFalse);
3164 _mov_nonkillable(Dest, Zero); 3169 _mov_nonkillable(Dest, Zero);
3165 Context.insert(LabelTrue); 3170 Context.insert(LabelTrue);
3166 return; 3171 return;
3167 } 3172 }
3168 3173
3169 // cmp b, c 3174 // cmp b, c
(...skipping 21 matching lines...) Expand all
3191 // Expand the element to the appropriate size for it to be inserted 3196 // Expand the element to the appropriate size for it to be inserted
3192 // in the vector. 3197 // in the vector.
3193 Variable *Expanded = Func->template makeVariable(InVectorElementTy); 3198 Variable *Expanded = Func->template makeVariable(InVectorElementTy);
3194 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, 3199 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
3195 ElementToInsertNotLegalized); 3200 ElementToInsertNotLegalized);
3196 lowerCast(Cast); 3201 lowerCast(Cast);
3197 ElementToInsertNotLegalized = Expanded; 3202 ElementToInsertNotLegalized = Expanded;
3198 } 3203 }
3199 3204
3200 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || 3205 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
3201 InstructionSet >= Machine::SSE4_1) { 3206 InstructionSet >= Traits::SSE4_1) {
3202 // Use insertps, pinsrb, pinsrw, or pinsrd. 3207 // Use insertps, pinsrb, pinsrw, or pinsrd.
3203 Operand *ElementRM = 3208 Operand *ElementRM =
3204 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 3209 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3205 Operand *SourceVectRM = 3210 Operand *SourceVectRM =
3206 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3211 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3207 Variable *T = makeReg(Ty); 3212 Variable *T = makeReg(Ty);
3208 _movp(T, SourceVectRM); 3213 _movp(T, SourceVectRM);
3209 if (Ty == IceType_v4f32) 3214 if (Ty == IceType_v4f32)
3210 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 3215 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3211 else 3216 else
(...skipping 393 matching lines...) Expand 10 before | Expand all | Expand 10 after
3605 } 3610 }
3606 case Intrinsics::Sqrt: { 3611 case Intrinsics::Sqrt: {
3607 Operand *Src = legalize(Instr->getArg(0)); 3612 Operand *Src = legalize(Instr->getArg(0));
3608 Variable *Dest = Instr->getDest(); 3613 Variable *Dest = Instr->getDest();
3609 Variable *T = makeReg(Dest->getType()); 3614 Variable *T = makeReg(Dest->getType());
3610 _sqrtss(T, Src); 3615 _sqrtss(T, Src);
3611 _mov(Dest, T); 3616 _mov(Dest, T);
3612 return; 3617 return;
3613 } 3618 }
3614 case Intrinsics::Stacksave: { 3619 case Intrinsics::Stacksave: {
3615 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 3620 Variable *esp =
3621 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3616 Variable *Dest = Instr->getDest(); 3622 Variable *Dest = Instr->getDest();
3617 _mov(Dest, esp); 3623 _mov(Dest, esp);
3618 return; 3624 return;
3619 } 3625 }
3620 case Intrinsics::Stackrestore: { 3626 case Intrinsics::Stackrestore: {
3621 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 3627 Variable *esp =
3628 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3622 _mov_nonkillable(esp, Instr->getArg(0)); 3629 _mov_nonkillable(esp, Instr->getArg(0));
3623 return; 3630 return;
3624 } 3631 }
3625 case Intrinsics::Trap: 3632 case Intrinsics::Trap:
3626 _ud2(); 3633 _ud2();
3627 return; 3634 return;
3628 case Intrinsics::UnknownIntrinsic: 3635 case Intrinsics::UnknownIntrinsic:
3629 Func->setError("Should not be lowering UnknownIntrinsic"); 3636 Func->setError("Should not be lowering UnknownIntrinsic");
3630 return; 3637 return;
3631 } 3638 }
3632 return; 3639 return;
3633 } 3640 }
3634 3641
3635 template <class Machine> 3642 template <class Machine>
3636 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3643 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3637 Operand *Ptr, Operand *Expected, 3644 Operand *Ptr, Operand *Expected,
3638 Operand *Desired) { 3645 Operand *Desired) {
3639 if (Expected->getType() == IceType_i64) { 3646 if (Expected->getType() == IceType_i64) {
3640 // Reserve the pre-colored registers first, before adding any more 3647 // Reserve the pre-colored registers first, before adding any more
3641 // infinite-weight variables from formMemoryOperand's legalization. 3648 // infinite-weight variables from formMemoryOperand's legalization.
3642 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 3649 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3643 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); 3650 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3644 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 3651 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3645 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); 3652 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3646 _mov(T_eax, loOperand(Expected)); 3653 _mov(T_eax, loOperand(Expected));
3647 _mov(T_edx, hiOperand(Expected)); 3654 _mov(T_edx, hiOperand(Expected));
3648 _mov(T_ebx, loOperand(Desired)); 3655 _mov(T_ebx, loOperand(Desired));
3649 _mov(T_ecx, hiOperand(Desired)); 3656 _mov(T_ecx, hiOperand(Desired));
3650 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3657 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
3651 const bool Locked = true; 3658 const bool Locked = true;
3652 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3659 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3653 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3660 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3654 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3661 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3655 _mov(DestLo, T_eax); 3662 _mov(DestLo, T_eax);
3656 _mov(DestHi, T_edx); 3663 _mov(DestHi, T_edx);
3657 return; 3664 return;
3658 } 3665 }
3659 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); 3666 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
3660 _mov(T_eax, Expected); 3667 _mov(T_eax, Expected);
3661 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3668 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
3662 Variable *DesiredReg = legalizeToVar(Desired); 3669 Variable *DesiredReg = legalizeToVar(Desired);
3663 const bool Locked = true; 3670 const bool Locked = true;
3664 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3671 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3665 _mov(DestPrev, T_eax); 3672 _mov(DestPrev, T_eax);
3666 } 3673 }
3667 3674
3668 template <class Machine> 3675 template <class Machine>
3669 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 3676 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
3720 NextBr->isLastUse(NextCmp->getDest())) { 3727 NextBr->isLastUse(NextCmp->getDest())) {
3721 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); 3728 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3722 for (size_t i = 0; i < PhiAssigns.size(); ++i) { 3729 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3723 // Lower the phi assignments now, before the branch (same placement 3730 // Lower the phi assignments now, before the branch (same placement
3724 // as before). 3731 // as before).
3725 InstAssign *PhiAssign = PhiAssigns[i]; 3732 InstAssign *PhiAssign = PhiAssigns[i];
3726 PhiAssign->setDeleted(); 3733 PhiAssign->setDeleted();
3727 lowerAssign(PhiAssign); 3734 lowerAssign(PhiAssign);
3728 Context.advanceNext(); 3735 Context.advanceNext();
3729 } 3736 }
3730 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); 3737 _br(Traits::Cond::Br_e, NextBr->getTargetTrue(),
3738 NextBr->getTargetFalse());
3731 // Skip over the old compare and branch, by deleting them. 3739 // Skip over the old compare and branch, by deleting them.
3732 NextCmp->setDeleted(); 3740 NextCmp->setDeleted();
3733 NextBr->setDeleted(); 3741 NextBr->setDeleted();
3734 Context.advanceNext(); 3742 Context.advanceNext();
3735 Context.advanceNext(); 3743 Context.advanceNext();
3736 return true; 3744 return true;
3737 } 3745 }
3738 } 3746 }
3739 } 3747 }
3740 return false; 3748 return false;
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
3851 // mov <reg>, eax 3859 // mov <reg>, eax
3852 // op <reg>, [desired_adj] 3860 // op <reg>, [desired_adj]
3853 // lock cmpxchg [ptr], <reg> 3861 // lock cmpxchg [ptr], <reg>
3854 // jne .LABEL 3862 // jne .LABEL
3855 // mov <dest>, eax 3863 // mov <dest>, eax
3856 // 3864 //
3857 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3865 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3858 Val = legalize(Val); 3866 Val = legalize(Val);
3859 Type Ty = Val->getType(); 3867 Type Ty = Val->getType();
3860 if (Ty == IceType_i64) { 3868 if (Ty == IceType_i64) {
3861 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 3869 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3862 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); 3870 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3863 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); 3871 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
3864 _mov(T_eax, loOperand(Addr)); 3872 _mov(T_eax, loOperand(Addr));
3865 _mov(T_edx, hiOperand(Addr)); 3873 _mov(T_edx, hiOperand(Addr));
3866 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 3874 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3867 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); 3875 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3868 InstX8632Label *Label = InstX8632Label::create(Func, this); 3876 InstX8632Label *Label = InstX8632Label::create(Func, this);
3869 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3877 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3870 if (!IsXchg8b) { 3878 if (!IsXchg8b) {
3871 Context.insert(Label); 3879 Context.insert(Label);
3872 _mov(T_ebx, T_eax); 3880 _mov(T_ebx, T_eax);
3873 (this->*Op_Lo)(T_ebx, loOperand(Val)); 3881 (this->*Op_Lo)(T_ebx, loOperand(Val));
3874 _mov(T_ecx, T_edx); 3882 _mov(T_ecx, T_edx);
3875 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3883 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3876 } else { 3884 } else {
3877 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3885 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3878 // It just needs the Val loaded into ebx and ecx. 3886 // It just needs the Val loaded into ebx and ecx.
3879 // That can also be done before the loop. 3887 // That can also be done before the loop.
3880 _mov(T_ebx, loOperand(Val)); 3888 _mov(T_ebx, loOperand(Val));
3881 _mov(T_ecx, hiOperand(Val)); 3889 _mov(T_ecx, hiOperand(Val));
3882 Context.insert(Label); 3890 Context.insert(Label);
3883 } 3891 }
3884 const bool Locked = true; 3892 const bool Locked = true;
3885 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3893 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3886 _br(CondX86::Br_ne, Label); 3894 _br(Traits::Cond::Br_ne, Label);
3887 if (!IsXchg8b) { 3895 if (!IsXchg8b) {
3888 // If Val is a variable, model the extended live range of Val through 3896 // If Val is a variable, model the extended live range of Val through
3889 // the end of the loop, since it will be re-used by the loop. 3897 // the end of the loop, since it will be re-used by the loop.
3890 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3898 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3891 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); 3899 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3892 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); 3900 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3893 Context.insert(InstFakeUse::create(Func, ValLo)); 3901 Context.insert(InstFakeUse::create(Func, ValLo));
3894 Context.insert(InstFakeUse::create(Func, ValHi)); 3902 Context.insert(InstFakeUse::create(Func, ValHi));
3895 } 3903 }
3896 } else { 3904 } else {
3897 // For xchg, the loop is slightly smaller and ebx/ecx are used. 3905 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3898 Context.insert(InstFakeUse::create(Func, T_ebx)); 3906 Context.insert(InstFakeUse::create(Func, T_ebx));
3899 Context.insert(InstFakeUse::create(Func, T_ecx)); 3907 Context.insert(InstFakeUse::create(Func, T_ecx));
3900 } 3908 }
3901 // The address base (if any) is also reused in the loop. 3909 // The address base (if any) is also reused in the loop.
3902 if (Variable *Base = Addr->getBase()) 3910 if (Variable *Base = Addr->getBase())
3903 Context.insert(InstFakeUse::create(Func, Base)); 3911 Context.insert(InstFakeUse::create(Func, Base));
3904 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3912 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3905 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3913 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3906 _mov(DestLo, T_eax); 3914 _mov(DestLo, T_eax);
3907 _mov(DestHi, T_edx); 3915 _mov(DestHi, T_edx);
3908 return; 3916 return;
3909 } 3917 }
3910 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); 3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
3911 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax); 3919 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
3912 _mov(T_eax, Addr); 3920 _mov(T_eax, Addr);
3913 InstX8632Label *Label = InstX8632Label::create(Func, this); 3921 InstX8632Label *Label = InstX8632Label::create(Func, this);
3914 Context.insert(Label); 3922 Context.insert(Label);
3915 // We want to pick a different register for T than Eax, so don't use 3923 // We want to pick a different register for T than Eax, so don't use
3916 // _mov(T == nullptr, T_eax). 3924 // _mov(T == nullptr, T_eax).
3917 Variable *T = makeReg(Ty); 3925 Variable *T = makeReg(Ty);
3918 _mov(T, T_eax); 3926 _mov(T, T_eax);
3919 (this->*Op_Lo)(T, Val); 3927 (this->*Op_Lo)(T, Val);
3920 const bool Locked = true; 3928 const bool Locked = true;
3921 _cmpxchg(Addr, T_eax, T, Locked); 3929 _cmpxchg(Addr, T_eax, T, Locked);
3922 _br(CondX86::Br_ne, Label); 3930 _br(Traits::Cond::Br_ne, Label);
3923 // If Val is a variable, model the extended live range of Val through 3931 // If Val is a variable, model the extended live range of Val through
3924 // the end of the loop, since it will be re-used by the loop. 3932 // the end of the loop, since it will be re-used by the loop.
3925 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3933 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3926 Context.insert(InstFakeUse::create(Func, ValVar)); 3934 Context.insert(InstFakeUse::create(Func, ValVar));
3927 } 3935 }
3928 // The address base (if any) is also reused in the loop. 3936 // The address base (if any) is also reused in the loop.
3929 if (Variable *Base = Addr->getBase()) 3937 if (Variable *Base = Addr->getBase())
3930 Context.insert(InstFakeUse::create(Func, Base)); 3938 Context.insert(InstFakeUse::create(Func, Base));
3931 _mov(Dest, T_eax); 3939 _mov(Dest, T_eax);
3932 } 3940 }
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
3976 } 3984 }
3977 Variable *T_Dest = makeReg(IceType_i32); 3985 Variable *T_Dest = makeReg(IceType_i32);
3978 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 3986 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3979 Constant *ThirtyOne = Ctx->getConstantInt32(31); 3987 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3980 if (Cttz) { 3988 if (Cttz) {
3981 _mov(T_Dest, ThirtyTwo); 3989 _mov(T_Dest, ThirtyTwo);
3982 } else { 3990 } else {
3983 Constant *SixtyThree = Ctx->getConstantInt32(63); 3991 Constant *SixtyThree = Ctx->getConstantInt32(63);
3984 _mov(T_Dest, SixtyThree); 3992 _mov(T_Dest, SixtyThree);
3985 } 3993 }
3986 _cmov(T_Dest, T, CondX86::Br_ne); 3994 _cmov(T_Dest, T, Traits::Cond::Br_ne);
3987 if (!Cttz) { 3995 if (!Cttz) {
3988 _xor(T_Dest, ThirtyOne); 3996 _xor(T_Dest, ThirtyOne);
3989 } 3997 }
3990 if (Ty == IceType_i32) { 3998 if (Ty == IceType_i32) {
3991 _mov(Dest, T_Dest); 3999 _mov(Dest, T_Dest);
3992 return; 4000 return;
3993 } 4001 }
3994 _add(T_Dest, ThirtyTwo); 4002 _add(T_Dest, ThirtyTwo);
3995 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4003 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3996 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 4004 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3997 // Will be using "test" on this, so we need a registerized variable. 4005 // Will be using "test" on this, so we need a registerized variable.
3998 Variable *SecondVar = legalizeToVar(SecondVal); 4006 Variable *SecondVar = legalizeToVar(SecondVal);
3999 Variable *T_Dest2 = makeReg(IceType_i32); 4007 Variable *T_Dest2 = makeReg(IceType_i32);
4000 if (Cttz) { 4008 if (Cttz) {
4001 _bsf(T_Dest2, SecondVar); 4009 _bsf(T_Dest2, SecondVar);
4002 } else { 4010 } else {
4003 _bsr(T_Dest2, SecondVar); 4011 _bsr(T_Dest2, SecondVar);
4004 _xor(T_Dest2, ThirtyOne); 4012 _xor(T_Dest2, ThirtyOne);
4005 } 4013 }
4006 _test(SecondVar, SecondVar); 4014 _test(SecondVar, SecondVar);
4007 _cmov(T_Dest2, T_Dest, CondX86::Br_e); 4015 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
4008 _mov(DestLo, T_Dest2); 4016 _mov(DestLo, T_Dest2);
4009 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 4017 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
4010 } 4018 }
4011 4019
4012 bool isAdd(const Inst *Inst) { 4020 bool isAdd(const Inst *Inst) {
4013 if (const InstArithmetic *Arith = 4021 if (const InstArithmetic *Arith =
4014 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 4022 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
4015 return (Arith->getOp() == InstArithmetic::Add); 4023 return (Arith->getOp() == InstArithmetic::Add);
4016 } 4024 }
4017 return false; 4025 return false;
(...skipping 281 matching lines...) Expand 10 before | Expand all | Expand 10 after
4299 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { 4307 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
4300 Func->setError("Phi found in regular instruction list"); 4308 Func->setError("Phi found in regular instruction list");
4301 } 4309 }
4302 4310
4303 template <class Machine> 4311 template <class Machine>
4304 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) { 4312 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {
4305 Variable *Reg = nullptr; 4313 Variable *Reg = nullptr;
4306 if (Inst->hasRetValue()) { 4314 if (Inst->hasRetValue()) {
4307 Operand *Src0 = legalize(Inst->getRetValue()); 4315 Operand *Src0 = legalize(Inst->getRetValue());
4308 if (Src0->getType() == IceType_i64) { 4316 if (Src0->getType() == IceType_i64) {
4309 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax); 4317 Variable *eax =
4310 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx); 4318 legalizeToVar(loOperand(Src0), Traits::RegisterSet::Reg_eax);
4319 Variable *edx =
4320 legalizeToVar(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
4311 Reg = eax; 4321 Reg = eax;
4312 Context.insert(InstFakeUse::create(Func, edx)); 4322 Context.insert(InstFakeUse::create(Func, edx));
4313 } else if (isScalarFloatingType(Src0->getType())) { 4323 } else if (isScalarFloatingType(Src0->getType())) {
4314 _fld(Src0); 4324 _fld(Src0);
4315 } else if (isVectorType(Src0->getType())) { 4325 } else if (isVectorType(Src0->getType())) {
4316 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0); 4326 Reg = legalizeToVar(Src0, Traits::RegisterSet::Reg_xmm0);
4317 } else { 4327 } else {
4318 _mov(Reg, Src0, RegX8632::Reg_eax); 4328 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
4319 } 4329 }
4320 } 4330 }
4321 // Add a ret instruction even if sandboxing is enabled, because 4331 // Add a ret instruction even if sandboxing is enabled, because
4322 // addEpilog explicitly looks for a ret instruction as a marker for 4332 // addEpilog explicitly looks for a ret instruction as a marker for
4323 // where to insert the frame removal instructions. 4333 // where to insert the frame removal instructions.
4324 _ret(Reg); 4334 _ret(Reg);
4325 // Add a fake use of esp to make sure esp stays alive for the entire 4335 // Add a fake use of esp to make sure esp stays alive for the entire
4326 // function. Otherwise post-call esp adjustments get dead-code 4336 // function. Otherwise post-call esp adjustments get dead-code
4327 // eliminated. TODO: Are there more places where the fake use 4337 // eliminated. TODO: Are there more places where the fake use
4328 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 4338 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
4329 // have a ret instruction. 4339 // have a ret instruction.
4330 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 4340 Variable *esp =
4341 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
4331 Context.insert(InstFakeUse::create(Func, esp)); 4342 Context.insert(InstFakeUse::create(Func, esp));
4332 } 4343 }
4333 4344
4334 template <class Machine> 4345 template <class Machine>
4335 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { 4346 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
4336 Variable *Dest = Inst->getDest(); 4347 Variable *Dest = Inst->getDest();
4337 Type DestTy = Dest->getType(); 4348 Type DestTy = Dest->getType();
4338 Operand *SrcT = Inst->getTrueOperand(); 4349 Operand *SrcT = Inst->getTrueOperand();
4339 Operand *SrcF = Inst->getFalseOperand(); 4350 Operand *SrcF = Inst->getFalseOperand();
4340 Operand *Condition = Inst->getCondition(); 4351 Operand *Condition = Inst->getCondition();
4341 4352
4342 if (isVectorType(DestTy)) { 4353 if (isVectorType(DestTy)) {
4343 Type SrcTy = SrcT->getType(); 4354 Type SrcTy = SrcT->getType();
4344 Variable *T = makeReg(SrcTy); 4355 Variable *T = makeReg(SrcTy);
4345 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 4356 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4346 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 4357 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4347 if (InstructionSet >= Machine::SSE4_1) { 4358 if (InstructionSet >= Traits::SSE4_1) {
4348 // TODO(wala): If the condition operand is a constant, use blendps 4359 // TODO(wala): If the condition operand is a constant, use blendps
4349 // or pblendw. 4360 // or pblendw.
4350 // 4361 //
4351 // Use blendvps or pblendvb to implement select. 4362 // Use blendvps or pblendvb to implement select.
4352 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 4363 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4353 SrcTy == IceType_v4f32) { 4364 SrcTy == IceType_v4f32) {
4354 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4365 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4355 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); 4366 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
4356 _movp(xmm0, ConditionRM); 4367 _movp(xmm0, ConditionRM);
4357 _psll(xmm0, Ctx->getConstantInt8(31)); 4368 _psll(xmm0, Ctx->getConstantInt8(31));
4358 _movp(T, SrcFRM); 4369 _movp(T, SrcFRM);
4359 _blendvps(T, SrcTRM, xmm0); 4370 _blendvps(T, SrcTRM, xmm0);
4360 _movp(Dest, T); 4371 _movp(Dest, T);
4361 } else { 4372 } else {
4362 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 4373 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
4363 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 4374 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
4364 : IceType_v16i8; 4375 : IceType_v16i8;
4365 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); 4376 Variable *xmm0 = makeReg(SignExtTy, Traits::RegisterSet::Reg_xmm0);
4366 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 4377 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
4367 _movp(T, SrcFRM); 4378 _movp(T, SrcFRM);
4368 _pblendvb(T, SrcTRM, xmm0); 4379 _pblendvb(T, SrcTRM, xmm0);
4369 _movp(Dest, T); 4380 _movp(Dest, T);
4370 } 4381 }
4371 return; 4382 return;
4372 } 4383 }
4373 // Lower select without Machine::SSE4.1: 4384 // Lower select without Traits::SSE4.1:
4374 // a=d?b:c ==> 4385 // a=d?b:c ==>
4375 // if elementtype(d) != i1: 4386 // if elementtype(d) != i1:
4376 // d=sext(d); 4387 // d=sext(d);
4377 // a=(b&d)|(c&~d); 4388 // a=(b&d)|(c&~d);
4378 Variable *T2 = makeReg(SrcTy); 4389 Variable *T2 = makeReg(SrcTy);
4379 // Sign extend the condition operand if applicable. 4390 // Sign extend the condition operand if applicable.
4380 if (SrcTy == IceType_v4f32) { 4391 if (SrcTy == IceType_v4f32) {
4381 // The sext operation takes only integer arguments. 4392 // The sext operation takes only integer arguments.
4382 Variable *T3 = Func->template makeVariable(IceType_v4i32); 4393 Variable *T3 = Func->template makeVariable(IceType_v4i32);
4383 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 4394 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
4384 _movp(T, T3); 4395 _movp(T, T3);
4385 } else if (typeElementType(SrcTy) != IceType_i1) { 4396 } else if (typeElementType(SrcTy) != IceType_i1) {
4386 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); 4397 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
4387 } else { 4398 } else {
4388 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4399 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4389 _movp(T, ConditionRM); 4400 _movp(T, ConditionRM);
4390 } 4401 }
4391 _movp(T2, T); 4402 _movp(T2, T);
4392 _pand(T, SrcTRM); 4403 _pand(T, SrcTRM);
4393 _pandn(T2, SrcFRM); 4404 _pandn(T2, SrcFRM);
4394 _por(T, T2); 4405 _por(T, T2);
4395 _movp(Dest, T); 4406 _movp(Dest, T);
4396 4407
4397 return; 4408 return;
4398 } 4409 }
4399 4410
4400 CondX86::BrCond Cond = CondX86::Br_ne; 4411 typename Traits::Cond::BrCond Cond = Traits::Cond::Br_ne;
4401 Operand *CmpOpnd0 = nullptr; 4412 Operand *CmpOpnd0 = nullptr;
4402 Operand *CmpOpnd1 = nullptr; 4413 Operand *CmpOpnd1 = nullptr;
4403 // Handle folding opportunities. 4414 // Handle folding opportunities.
4404 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) { 4415 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
4405 assert(Producer->isDeleted()); 4416 assert(Producer->isDeleted());
4406 switch (BoolFolding::getProducerKind(Producer)) { 4417 switch (BoolFolding::getProducerKind(Producer)) {
4407 default: 4418 default:
4408 break; 4419 break;
4409 case BoolFolding::PK_Icmp32: { 4420 case BoolFolding::PK_Icmp32: {
4410 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 4421 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
4535 Src0Hi = legalizeToVar(Src0Hi); 4546 Src0Hi = legalizeToVar(Src0Hi);
4536 } else { 4547 } else {
4537 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 4548 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4538 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 4549 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4539 } 4550 }
4540 for (SizeT I = 0; I < NumCases; ++I) { 4551 for (SizeT I = 0; I < NumCases; ++I) {
4541 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); 4552 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4542 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); 4553 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4543 InstX8632Label *Label = InstX8632Label::create(Func, this); 4554 InstX8632Label *Label = InstX8632Label::create(Func, this);
4544 _cmp(Src0Lo, ValueLo); 4555 _cmp(Src0Lo, ValueLo);
4545 _br(CondX86::Br_ne, Label); 4556 _br(Traits::Cond::Br_ne, Label);
4546 _cmp(Src0Hi, ValueHi); 4557 _cmp(Src0Hi, ValueHi);
4547 _br(CondX86::Br_e, Inst->getLabel(I)); 4558 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4548 Context.insert(Label); 4559 Context.insert(Label);
4549 } 4560 }
4550 _br(Inst->getLabelDefault()); 4561 _br(Inst->getLabelDefault());
4551 return; 4562 return;
4552 } 4563 }
4553 // OK, we'll be slightly less naive by forcing Src into a physical 4564 // OK, we'll be slightly less naive by forcing Src into a physical
4554 // register if there are 2 or more uses. 4565 // register if there are 2 or more uses.
4555 if (NumCases >= 2) 4566 if (NumCases >= 2)
4556 Src0 = legalizeToVar(Src0); 4567 Src0 = legalizeToVar(Src0);
4557 else 4568 else
4558 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); 4569 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4559 for (SizeT I = 0; I < NumCases; ++I) { 4570 for (SizeT I = 0; I < NumCases; ++I) {
4560 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); 4571 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4561 _cmp(Src0, Value); 4572 _cmp(Src0, Value);
4562 _br(CondX86::Br_e, Inst->getLabel(I)); 4573 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4563 } 4574 }
4564 4575
4565 _br(Inst->getLabelDefault()); 4576 _br(Inst->getLabelDefault());
4566 } 4577 }
4567 4578
4568 template <class Machine> 4579 template <class Machine>
4569 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4580 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4570 Variable *Dest, Operand *Src0, 4581 Variable *Dest, Operand *Src0,
4571 Operand *Src1) { 4582 Operand *Src1) {
4572 assert(isVectorType(Dest->getType())); 4583 assert(isVectorType(Dest->getType()));
(...skipping 628 matching lines...) Expand 10 before | Expand all | Expand 10 after
5201 return; 5212 return;
5202 inferTwoAddress(); 5213 inferTwoAddress();
5203 } 5214 }
5204 5215
5205 template <class Machine> 5216 template <class Machine>
5206 void TargetX86Base<Machine>::makeRandomRegisterPermutation( 5217 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5207 llvm::SmallVectorImpl<int32_t> &Permutation, 5218 llvm::SmallVectorImpl<int32_t> &Permutation,
5208 const llvm::SmallBitVector &ExcludeRegisters) const { 5219 const llvm::SmallBitVector &ExcludeRegisters) const {
5209 // TODO(stichnot): Declaring Permutation this way loses type/size 5220 // TODO(stichnot): Declaring Permutation this way loses type/size
5210 // information. Fix this in conjunction with the caller-side TODO. 5221 // information. Fix this in conjunction with the caller-side TODO.
5211 assert(Permutation.size() >= RegX8632::Reg_NUM); 5222 assert(Permutation.size() >= Traits::RegisterSet::Reg_NUM);
5212 // Expected upper bound on the number of registers in a single 5223 // Expected upper bound on the number of registers in a single
5213 // equivalence class. For x86-32, this would comprise the 8 XMM 5224 // equivalence class. For x86-32, this would comprise the 8 XMM
5214 // registers. This is for performance, not correctness. 5225 // registers. This is for performance, not correctness.
5215 static const unsigned MaxEquivalenceClassSize = 8; 5226 static const unsigned MaxEquivalenceClassSize = 8;
5216 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; 5227 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
5217 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; 5228 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
5218 EquivalenceClassMap EquivalenceClasses; 5229 EquivalenceClassMap EquivalenceClasses;
5219 SizeT NumShuffled = 0, NumPreserved = 0; 5230 SizeT NumShuffled = 0, NumPreserved = 0;
5220 5231
5221 // Build up the equivalence classes of registers by looking at the 5232 // Build up the equivalence classes of registers by looking at the
5222 // register properties as well as whether the registers should be 5233 // register properties as well as whether the registers should be
5223 // explicitly excluded from shuffling. 5234 // explicitly excluded from shuffling.
5224 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 5235 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
5225 frameptr, isI8, isInt, isFP) \ 5236 frameptr, isI8, isInt, isFP) \
5226 if (ExcludeRegisters[RegX8632::val]) { \ 5237 if (ExcludeRegisters[Traits::RegisterSet::val]) { \
5227 /* val stays the same in the resulting permutation. */ \ 5238 /* val stays the same in the resulting permutation. */ \
5228 Permutation[RegX8632::val] = RegX8632::val; \ 5239 Permutation[Traits::RegisterSet::val] = Traits::RegisterSet::val; \
5229 ++NumPreserved; \ 5240 ++NumPreserved; \
5230 } else { \ 5241 } else { \
5231 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \ 5242 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
5232 (isInt << 3) | (isFP << 4); \ 5243 (isInt << 3) | (isFP << 4); \
5233 /* val is assigned to an equivalence class based on its properties. */ \ 5244 /* val is assigned to an equivalence class based on its properties. */ \
5234 EquivalenceClasses[Index].push_back(RegX8632::val); \ 5245 EquivalenceClasses[Index].push_back(Traits::RegisterSet::val); \
5235 } 5246 }
5236 REGX8632_TABLE 5247 REGX8632_TABLE
5237 #undef X 5248 #undef X
5238 5249
5239 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 5250 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
5240 5251
5241 // Shuffle the resulting equivalence classes. 5252 // Shuffle the resulting equivalence classes.
5242 for (auto I : EquivalenceClasses) { 5253 for (auto I : EquivalenceClasses) {
5243 const RegisterList &List = I.second; 5254 const RegisterList &List = I.second;
5244 RegisterList Shuffled(List); 5255 RegisterList Shuffled(List);
5245 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG); 5256 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
5246 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) { 5257 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
5247 Permutation[List[SI]] = Shuffled[SI]; 5258 Permutation[List[SI]] = Shuffled[SI];
5248 ++NumShuffled; 5259 ++NumShuffled;
5249 } 5260 }
5250 } 5261 }
5251 5262
5252 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM); 5263 assert(NumShuffled + NumPreserved == Traits::RegisterSet::Reg_NUM);
5253 5264
5254 if (Func->isVerbose(IceV_Random)) { 5265 if (Func->isVerbose(IceV_Random)) {
5255 OstreamLocker L(Func->getContext()); 5266 OstreamLocker L(Func->getContext());
5256 Ostream &Str = Func->getContext()->getStrDump(); 5267 Ostream &Str = Func->getContext()->getStrDump();
5257 Str << "Register equivalence classes:\n"; 5268 Str << "Register equivalence classes:\n";
5258 for (auto I : EquivalenceClasses) { 5269 for (auto I : EquivalenceClasses) {
5259 Str << "{"; 5270 Str << "{";
5260 const RegisterList &List = I.second; 5271 const RegisterList &List = I.second;
5261 bool First = true; 5272 bool First = true;
5262 for (int32_t Register : List) { 5273 for (int32_t Register : List) {
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
5496 } 5507 }
5497 // the offset is not eligible for blinding or pooling, return the original 5508 // the offset is not eligible for blinding or pooling, return the original
5498 // mem operand 5509 // mem operand
5499 return MemOperand; 5510 return MemOperand;
5500 } 5511 }
5501 5512
5502 } // end of namespace X86Internal 5513 } // end of namespace X86Internal
5503 } // end of namespace Ice 5514 } // end of namespace Ice
5504 5515
5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5516 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698