Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1419903002: Subzero: Refactor x86 register definitions to use the alias mechanism. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix assembler unit tests. Fix register names. Code review changes. Rebase Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8664Traits.h ('k') | src/IceTimerTree.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 569 matching lines...) Expand 10 before | Expand all | Expand 10 after
580 Node->getInsts().insert(I3, RMW); 580 Node->getInsts().insert(I3, RMW);
581 } 581 }
582 } 582 }
583 if (Func->isVerbose(IceV_RMW)) 583 if (Func->isVerbose(IceV_RMW))
584 Func->getContext()->unlockStr(); 584 Func->getContext()->unlockStr();
585 } 585 }
586 586
587 // Converts a ConstantInteger32 operand into its constant value, or 587 // Converts a ConstantInteger32 operand into its constant value, or
588 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 588 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
591 return Integer->getValue(); 591 return Integer->getValue();
592 return Intrinsics::MemoryOrderInvalid; 592 return Intrinsics::MemoryOrderInvalid;
593 } 593 }
594 594
595 /// Determines whether the dest of a Load instruction can be folded into one of 595 /// Determines whether the dest of a Load instruction can be folded into one of
596 /// the src operands of a 2-operand instruction. This is true as long as the 596 /// the src operands of a 2-operand instruction. This is true as long as the
597 /// load dest matches exactly one of the binary instruction's src operands. 597 /// load dest matches exactly one of the binary instruction's src operands.
598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true. 598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.
599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 599 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
600 Operand *&Src0, Operand *&Src1) { 600 Operand *&Src0, Operand *&Src1) {
(...skipping 14 matching lines...) Expand all
615 while (!Context.atEnd()) { 615 while (!Context.atEnd()) {
616 Variable *LoadDest = nullptr; 616 Variable *LoadDest = nullptr;
617 Operand *LoadSrc = nullptr; 617 Operand *LoadSrc = nullptr;
618 Inst *CurInst = Context.getCur(); 618 Inst *CurInst = Context.getCur();
619 Inst *Next = Context.getNextInst(); 619 Inst *Next = Context.getNextInst();
620 // Determine whether the current instruction is a Load instruction or 620 // Determine whether the current instruction is a Load instruction or
621 // equivalent. 621 // equivalent.
622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
623 // An InstLoad always qualifies. 623 // An InstLoad always qualifies.
624 LoadDest = Load->getDest(); 624 LoadDest = Load->getDest();
625 const bool DoLegalize = false; 625 constexpr bool DoLegalize = false;
626 LoadSrc = formMemoryOperand(Load->getSourceAddress(), 626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
627 LoadDest->getType(), DoLegalize); 627 LoadDest->getType(), DoLegalize);
628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { 628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory 629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
630 // ordering, and can be implemented in a single instruction (i.e., not 630 // ordering, and can be implemented in a single instruction (i.e., not
631 // i64 on x86-32). 631 // i64 on x86-32).
632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; 632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
633 if (ID == Intrinsics::AtomicLoad && 633 if (ID == Intrinsics::AtomicLoad &&
634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && 634 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
635 Intrinsics::isMemoryOrderValid( 635 Intrinsics::isMemoryOrderValid(
636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { 636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
637 LoadDest = Intrin->getDest(); 637 LoadDest = Intrin->getDest();
638 const bool DoLegalize = false; 638 constexpr bool DoLegalize = false;
639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), 639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
640 DoLegalize); 640 DoLegalize);
641 } 641 }
642 } 642 }
643 // A Load instruction can be folded into the following instruction only 643 // A Load instruction can be folded into the following instruction only
644 // if the following instruction ends the Load's Dest variable's live 644 // if the following instruction ends the Load's Dest variable's live
645 // range. 645 // range.
646 if (LoadDest && Next && Next->isLastUse(LoadDest)) { 646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
647 assert(LoadSrc); 647 assert(LoadSrc);
648 Inst *NewInst = nullptr; 648 Inst *NewInst = nullptr;
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
726 // considered live upon function entry. Otherwise it's possible to get 726 // considered live upon function entry. Otherwise it's possible to get
727 // liveness validation errors for saving callee-save registers. 727 // liveness validation errors for saving callee-save registers.
728 Func->addImplicitArg(Reg); 728 Func->addImplicitArg(Reg);
729 // Don't bother tracking the live range of a named physical register. 729 // Don't bother tracking the live range of a named physical register.
730 Reg->setIgnoreLiveness(); 730 Reg->setIgnoreLiveness();
731 } 731 }
732 return Reg; 732 return Reg;
733 } 733 }
734 734
735 template <class Machine> 735 template <class Machine>
736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const {
737 return Traits::getRegName(RegNum, Ty); 737 return Traits::getRegName(RegNum);
738 } 738 }
739 739
740 template <class Machine> 740 template <class Machine>
741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { 741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
742 if (!BuildDefs::dump()) 742 if (!BuildDefs::dump())
743 return; 743 return;
744 Ostream &Str = Ctx->getStrEmit(); 744 Ostream &Str = Ctx->getStrEmit();
745 if (Var->hasReg()) { 745 if (Var->hasReg()) {
746 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
747 return; 747 return;
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
790 if (Var->mustHaveReg()) { 790 if (Var->mustHaveReg()) {
791 llvm_unreachable("Infinite-weight Variable has no register assigned"); 791 llvm_unreachable("Infinite-weight Variable has no register assigned");
792 } 792 }
793 int32_t Offset = Var->getStackOffset(); 793 int32_t Offset = Var->getStackOffset();
794 int32_t BaseRegNum = Var->getBaseRegNum(); 794 int32_t BaseRegNum = Var->getBaseRegNum();
795 if (Var->getBaseRegNum() == Variable::NoRegister) { 795 if (Var->getBaseRegNum() == Variable::NoRegister) {
796 BaseRegNum = getFrameOrStackReg(); 796 BaseRegNum = getFrameOrStackReg();
797 if (!hasFramePointer()) 797 if (!hasFramePointer())
798 Offset += getStackAdjustment(); 798 Offset += getStackAdjustment();
799 } 799 }
800 return typename Traits::Address( 800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,
801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset, 801 AssemblerFixup::NoFixup);
802 AssemblerFixup::NoFixup);
803 } 802 }
804 803
805 /// Helper function for addProlog(). 804 /// Helper function for addProlog().
806 /// 805 ///
807 /// This assumes Arg is an argument passed on the stack. This sets the frame 806 /// This assumes Arg is an argument passed on the stack. This sets the frame
808 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
809 /// I64 arg that has been split into Lo and Hi components, it calls itself 808 /// I64 arg that has been split into Lo and Hi components, it calls itself
810 /// recursively on the components, taking care to handle Lo first because of the 809 /// recursively on the components, taking care to handle Lo first because of the
811 /// little-endian architecture. Lastly, this function generates an instruction 810 /// little-endian architecture. Lastly, this function generates an instruction
812 /// to copy Arg into its assigned register if applicable. 811 /// to copy Arg into its assigned register if applicable.
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after
1041 Src1 /= 2; 1040 Src1 /= 2;
1042 } else { 1041 } else {
1043 return false; 1042 return false;
1044 } 1043 }
1045 } 1044 }
1046 // Lea optimization only works for i16 and i32 types, not i8. 1045 // Lea optimization only works for i16 and i32 types, not i8.
1047 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) 1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1048 return false; 1047 return false;
1049 // Limit the number of lea/shl operations for a single multiply, to a 1048 // Limit the number of lea/shl operations for a single multiply, to a
1050 // somewhat arbitrary choice of 3. 1049 // somewhat arbitrary choice of 3.
1051 const uint32_t MaxOpsForOptimizedMul = 3; 1050 constexpr uint32_t MaxOpsForOptimizedMul = 3;
1052 if (CountOps > MaxOpsForOptimizedMul) 1051 if (CountOps > MaxOpsForOptimizedMul)
1053 return false; 1052 return false;
1054 _mov(T, Src0); 1053 _mov(T, Src0);
1055 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1054 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1056 for (uint32_t i = 0; i < Count9; ++i) { 1055 for (uint32_t i = 0; i < Count9; ++i) {
1057 const uint16_t Shift = 3; // log2(9-1) 1056 constexpr uint16_t Shift = 3; // log2(9-1)
1058 _lea(T, 1057 _lea(T,
1059 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1060 } 1059 }
1061 for (uint32_t i = 0; i < Count5; ++i) { 1060 for (uint32_t i = 0; i < Count5; ++i) {
1062 const uint16_t Shift = 2; // log2(5-1) 1061 constexpr uint16_t Shift = 2; // log2(5-1)
1063 _lea(T, 1062 _lea(T,
1064 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1065 } 1064 }
1066 for (uint32_t i = 0; i < Count3; ++i) { 1065 for (uint32_t i = 0; i < Count3; ++i) {
1067 const uint16_t Shift = 1; // log2(3-1) 1066 constexpr uint16_t Shift = 1; // log2(3-1)
1068 _lea(T, 1067 _lea(T,
1069 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1070 } 1069 }
1071 if (Count2) { 1070 if (Count2) {
1072 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1071 _shl(T, Ctx->getConstantInt(Ty, Count2));
1073 } 1072 }
1074 if (Src1IsNegative) 1073 if (Src1IsNegative)
1075 _neg(T); 1074 _neg(T);
1076 _mov(Dest, T); 1075 _mov(Dest, T);
1077 return true; 1076 return true;
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
1209 } 1208 }
1210 } else { 1209 } else {
1211 // NON-CONSTANT CASES. 1210 // NON-CONSTANT CASES.
1212 Constant *BitTest = Ctx->getConstantInt32(0x20); 1211 Constant *BitTest = Ctx->getConstantInt32(0x20);
1213 typename Traits::Insts::Label *Label = 1212 typename Traits::Insts::Label *Label =
1214 Traits::Insts::Label::create(Func, this); 1213 Traits::Insts::Label::create(Func, this);
1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==> 1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1216 // t1:ecx = c.lo & 0xff 1215 // t1:ecx = c.lo & 0xff
1217 // t2 = b.lo 1216 // t2 = b.lo
1218 // t3 = b.hi 1217 // t3 = b.hi
1219 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1218 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1219 _mov(T_1, Src1Lo);
1220 _mov(T_2, Src0Lo); 1220 _mov(T_2, Src0Lo);
1221 _mov(T_3, Src0Hi); 1221 _mov(T_3, Src0Hi);
1222 switch (Op) { 1222 switch (Op) {
1223 default: 1223 default:
1224 assert(0 && "non-shift op"); 1224 assert(0 && "non-shift op");
1225 break; 1225 break;
1226 case InstArithmetic::Shl: { 1226 case InstArithmetic::Shl: {
1227 // a=b<<c ==> 1227 // a=b<<c ==>
1228 // t3 = shld t3, t2, t1 1228 // t3 = shld t3, t2, t1
1229 // t2 = shl t2, t1 1229 // t2 = shl t2, t1
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
1317 } 1317 }
1318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1319 // These x86-32 helper-call-involved instructions are lowered in this 1319 // These x86-32 helper-call-involved instructions are lowered in this
1320 // separate switch. This is because loOperand() and hiOperand() may insert 1320 // separate switch. This is because loOperand() and hiOperand() may insert
1321 // redundant instructions for constant blinding and pooling. Such redundant 1321 // redundant instructions for constant blinding and pooling. Such redundant
1322 // instructions will fail liveness analysis under -Om1 setting. And, 1322 // instructions will fail liveness analysis under -Om1 setting. And,
1323 // actually these arguments do not need to be processed with loOperand() 1323 // actually these arguments do not need to be processed with loOperand()
1324 // and hiOperand() to be used. 1324 // and hiOperand() to be used.
1325 switch (Inst->getOp()) { 1325 switch (Inst->getOp()) {
1326 case InstArithmetic::Udiv: { 1326 case InstArithmetic::Udiv: {
1327 const SizeT MaxSrcs = 2; 1327 constexpr SizeT MaxSrcs = 2;
1328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1329 Call->addArg(Inst->getSrc(0)); 1329 Call->addArg(Inst->getSrc(0));
1330 Call->addArg(Inst->getSrc(1)); 1330 Call->addArg(Inst->getSrc(1));
1331 lowerCall(Call); 1331 lowerCall(Call);
1332 return; 1332 return;
1333 } 1333 }
1334 case InstArithmetic::Sdiv: { 1334 case InstArithmetic::Sdiv: {
1335 const SizeT MaxSrcs = 2; 1335 constexpr SizeT MaxSrcs = 2;
1336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs); 1336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1337 Call->addArg(Inst->getSrc(0)); 1337 Call->addArg(Inst->getSrc(0));
1338 Call->addArg(Inst->getSrc(1)); 1338 Call->addArg(Inst->getSrc(1));
1339 lowerCall(Call); 1339 lowerCall(Call);
1340 return; 1340 return;
1341 } 1341 }
1342 case InstArithmetic::Urem: { 1342 case InstArithmetic::Urem: {
1343 const SizeT MaxSrcs = 2; 1343 constexpr SizeT MaxSrcs = 2;
1344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs); 1344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1345 Call->addArg(Inst->getSrc(0)); 1345 Call->addArg(Inst->getSrc(0));
1346 Call->addArg(Inst->getSrc(1)); 1346 Call->addArg(Inst->getSrc(1));
1347 lowerCall(Call); 1347 lowerCall(Call);
1348 return; 1348 return;
1349 } 1349 }
1350 case InstArithmetic::Srem: { 1350 case InstArithmetic::Srem: {
1351 const SizeT MaxSrcs = 2; 1351 constexpr SizeT MaxSrcs = 2;
1352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs); 1352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1353 Call->addArg(Inst->getSrc(0)); 1353 Call->addArg(Inst->getSrc(0));
1354 Call->addArg(Inst->getSrc(1)); 1354 Call->addArg(Inst->getSrc(1));
1355 lowerCall(Call); 1355 lowerCall(Call);
1356 return; 1356 return;
1357 } 1357 }
1358 default: 1358 default:
1359 break; 1359 break;
1360 } 1360 }
1361 1361
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
1522 // pmuludq T1, Src1 1522 // pmuludq T1, Src1
1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1524 // pmuludq T2, T3 1524 // pmuludq T2, T3
1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1526 // shufps T1, T2, {0,2,0,2} 1526 // shufps T1, T2, {0,2,0,2}
1527 // pshufd T4, T1, {0,2,1,3} 1527 // pshufd T4, T1, {0,2,1,3}
1528 // movups Dest, T4 1528 // movups Dest, T4
1529 1529
1530 // Mask that directs pshufd to create a vector with entries 1530 // Mask that directs pshufd to create a vector with entries
1531 // Src[1, 0, 3, 0] 1531 // Src[1, 0, 3, 0]
1532 const unsigned Constant1030 = 0x31; 1532 constexpr unsigned Constant1030 = 0x31;
1533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); 1533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1534 // Mask that directs shufps to create a vector with entries 1534 // Mask that directs shufps to create a vector with entries
1535 // Dest[0, 2], Src[0, 2] 1535 // Dest[0, 2], Src[0, 2]
1536 const unsigned Mask0202 = 0x88; 1536 constexpr unsigned Mask0202 = 0x88;
1537 // Mask that directs pshufd to create a vector with entries 1537 // Mask that directs pshufd to create a vector with entries
1538 // Src[0, 2, 1, 3] 1538 // Src[0, 2, 1, 3]
1539 const unsigned Mask0213 = 0xd8; 1539 constexpr unsigned Mask0213 = 0xd8;
1540 Variable *T1 = makeReg(IceType_v4i32); 1540 Variable *T1 = makeReg(IceType_v4i32);
1541 Variable *T2 = makeReg(IceType_v4i32); 1541 Variable *T2 = makeReg(IceType_v4i32);
1542 Variable *T3 = makeReg(IceType_v4i32); 1542 Variable *T3 = makeReg(IceType_v4i32);
1543 Variable *T4 = makeReg(IceType_v4i32); 1543 Variable *T4 = makeReg(IceType_v4i32);
1544 _movp(T1, Src0); 1544 _movp(T1, Src0);
1545 _pshufd(T2, Src0, Mask1030); 1545 _pshufd(T2, Src0, Mask1030);
1546 _pshufd(T3, Src1, Mask1030); 1546 _pshufd(T3, Src1, Mask1030);
1547 _pmuludq(T1, Src1); 1547 _pmuludq(T1, Src1);
1548 _pmuludq(T2, T3); 1548 _pmuludq(T2, T3);
1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); 1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
1624 _mov(T, Src0); 1624 _mov(T, Src0);
1625 _sub(T, Src1); 1625 _sub(T, Src1);
1626 _mov(Dest, T); 1626 _mov(Dest, T);
1627 break; 1627 break;
1628 case InstArithmetic::Mul: 1628 case InstArithmetic::Mul:
1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1630 if (optimizeScalarMul(Dest, Src0, C->getValue())) 1630 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1631 return; 1631 return;
1632 } 1632 }
1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must 1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must
1634 // be in eax. 1634 // be in al.
1635 if (isByteSizedArithType(Dest->getType())) { 1635 if (isByteSizedArithType(Dest->getType())) {
1636 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1636 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1638 _imul(T, Src0 == Src1 ? T : Src1); 1638 _imul(T, Src0 == Src1 ? T : Src1);
1639 _mov(Dest, T); 1639 _mov(Dest, T);
1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1641 T = makeReg(Dest->getType()); 1641 T = makeReg(Dest->getType());
1642 _imul_imm(T, Src0, ImmConst); 1642 _imul_imm(T, Src0, ImmConst);
1643 _mov(Dest, T); 1643 _mov(Dest, T);
1644 } else { 1644 } else {
1645 _mov(T, Src0); 1645 _mov(T, Src0);
1646 _imul(T, Src0 == Src1 ? T : Src1); 1646 _imul(T, Src0 == Src1 ? T : Src1);
1647 _mov(Dest, T); 1647 _mov(Dest, T);
1648 } 1648 }
1649 break; 1649 break;
1650 case InstArithmetic::Shl: 1650 case InstArithmetic::Shl:
1651 _mov(T, Src0); 1651 _mov(T, Src0);
1652 if (!llvm::isa<ConstantInteger32>(Src1)) 1652 if (!llvm::isa<ConstantInteger32>(Src1)) {
1653 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1653 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1654 _mov(Cl, Src1);
1655 Src1 = Cl;
1656 }
1654 _shl(T, Src1); 1657 _shl(T, Src1);
1655 _mov(Dest, T); 1658 _mov(Dest, T);
1656 break; 1659 break;
1657 case InstArithmetic::Lshr: 1660 case InstArithmetic::Lshr:
1658 _mov(T, Src0); 1661 _mov(T, Src0);
1659 if (!llvm::isa<ConstantInteger32>(Src1)) 1662 if (!llvm::isa<ConstantInteger32>(Src1)) {
1660 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1663 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1664 _mov(Cl, Src1);
1665 Src1 = Cl;
1666 }
1661 _shr(T, Src1); 1667 _shr(T, Src1);
1662 _mov(Dest, T); 1668 _mov(Dest, T);
1663 break; 1669 break;
1664 case InstArithmetic::Ashr: 1670 case InstArithmetic::Ashr:
1665 _mov(T, Src0); 1671 _mov(T, Src0);
1666 if (!llvm::isa<ConstantInteger32>(Src1)) 1672 if (!llvm::isa<ConstantInteger32>(Src1)) {
1667 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx); 1673 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);
1674 _mov(Cl, Src1);
1675 Src1 = Cl;
1676 }
1668 _sar(T, Src1); 1677 _sar(T, Src1);
1669 _mov(Dest, T); 1678 _mov(Dest, T);
1670 break; 1679 break;
1671 case InstArithmetic::Udiv: 1680 case InstArithmetic::Udiv:
1672 // div and idiv are the few arithmetic operators that do not allow 1681 // div and idiv are the few arithmetic operators that do not allow
1673 // immediates as the operand. 1682 // immediates as the operand.
1674 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1683 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1675 if (isByteSizedArithType(Dest->getType())) { 1684 if (isByteSizedArithType(Dest->getType())) {
1676 // For 8-bit unsigned division we need to zero-extend al into ah. A mov 1685 // For 8-bit unsigned division we need to zero-extend al into ah. A mov
1677 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 1686 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64
1678 // assembler refuses to encode %ah (encoding %spl with a REX prefix 1687 // assembler refuses to encode %ah (encoding %spl with a REX prefix
1679 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah 1688 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah
1680 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and 1689 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and
1681 // d[%lh], which means the X86 target lowering (and the register 1690 // d[%lh], which means the X86 target lowering (and the register
1682 // allocator) would have to be aware of this restriction. For now, we 1691 // allocator) would have to be aware of this restriction. For now, we
1683 // simply zero %eax completely, and move the dividend into %al. 1692 // simply zero %eax completely, and move the dividend into %al.
1684 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1693 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1685 Context.insert(InstFakeDef::create(Func, T_eax)); 1694 Context.insert(InstFakeDef::create(Func, T_eax));
1686 _xor(T_eax, T_eax); 1695 _xor(T_eax, T_eax);
1687 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1696 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1688 _div(T, Src1, T); 1697 _div(T, Src1, T);
1689 _mov(Dest, T); 1698 _mov(Dest, T);
1690 Context.insert(InstFakeUse::create(Func, T_eax)); 1699 Context.insert(InstFakeUse::create(Func, T_eax));
1691 } else { 1700 } else {
1692 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1701 Type Ty = Dest->getType();
1693 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1702 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1694 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); 1703 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1704 switch (Ty) {
1705 default:
1706 llvm_unreachable("Bad type for udiv");
1707 // fallthrough
1708 case IceType_i32:
1709 break;
1710 case IceType_i16:
1711 Eax = Traits::RegisterSet::Reg_ax;
1712 Edx = Traits::RegisterSet::Reg_dx;
1713 break;
1714 }
1715 Constant *Zero = Ctx->getConstantZero(Ty);
1716 _mov(T, Src0, Eax);
1717 _mov(T_edx, Zero, Edx);
1695 _div(T, Src1, T_edx); 1718 _div(T, Src1, T_edx);
1696 _mov(Dest, T); 1719 _mov(Dest, T);
1697 } 1720 }
1698 break; 1721 break;
1699 case InstArithmetic::Sdiv: 1722 case InstArithmetic::Sdiv:
1700 // TODO(stichnot): Enable this after doing better performance and cross 1723 // TODO(stichnot): Enable this after doing better performance and cross
1701 // testing. 1724 // testing.
1702 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1725 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1703 // Optimize division by constant power of 2, but not for Om1 or O0, just 1726 // Optimize division by constant power of 2, but not for Om1 or O0, just
1704 // to keep things simple there. 1727 // to keep things simple there.
(...skipping 21 matching lines...) Expand all
1726 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 1749 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1727 _add(T, Src0); 1750 _add(T, Src0);
1728 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1751 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1729 } 1752 }
1730 _mov(Dest, T); 1753 _mov(Dest, T);
1731 return; 1754 return;
1732 } 1755 }
1733 } 1756 }
1734 } 1757 }
1735 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1758 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1736 if (isByteSizedArithType(Dest->getType())) { 1759 switch (Type Ty = Dest->getType()) {
1760 default:
1761 llvm_unreachable("Bad type for sdiv");
1762 // fallthrough
1763 case IceType_i32:
1764 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1737 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1765 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1738 _cbwdq(T, T); 1766 break;
1739 _idiv(T, Src1, T); 1767 case IceType_i16:
1740 _mov(Dest, T); 1768 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1741 } else { 1769 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1742 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 1770 break;
1743 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1771 case IceType_i8:
1744 _cbwdq(T_edx, T); 1772 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1745 _idiv(T, Src1, T_edx); 1773 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1746 _mov(Dest, T); 1774 break;
1747 } 1775 }
1776 _cbwdq(T_edx, T);
1777 _idiv(T, Src1, T_edx);
1778 _mov(Dest, T);
1748 break; 1779 break;
1749 case InstArithmetic::Urem: 1780 case InstArithmetic::Urem:
1750 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1781 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1751 if (isByteSizedArithType(Dest->getType())) { 1782 if (isByteSizedArithType(Dest->getType())) {
1752 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1783 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
1753 Context.insert(InstFakeDef::create(Func, T_eax)); 1784 Context.insert(InstFakeDef::create(Func, T_eax));
1754 _xor(T_eax, T_eax); 1785 _xor(T_eax, T_eax);
1755 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1786 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1756 _div(T, Src1, T); 1787 _div(T, Src1, T);
1757 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't 1788 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1758 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1789 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1759 // this ever becomes a problem we can introduce a pseudo rem instruction 1790 // this ever becomes a problem we can introduce a pseudo rem instruction
1760 // that returns the remainder in %al directly (and uses a mov for copying 1791 // that returns the remainder in %al directly (and uses a mov for copying
1761 // %ah to %al.) 1792 // %ah to %al.)
1762 static constexpr uint8_t AlSizeInBits = 8; 1793 static constexpr uint8_t AlSizeInBits = 8;
1763 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1794 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1764 _mov(Dest, T); 1795 _mov(Dest, T);
1765 Context.insert(InstFakeUse::create(Func, T_eax)); 1796 Context.insert(InstFakeUse::create(Func, T_eax));
1766 } else { 1797 } else {
1767 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1798 Type Ty = Dest->getType();
1768 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); 1799 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1800 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1801 switch (Ty) {
1802 default:
1803 llvm_unreachable("Bad type for urem");
1804 // fallthrough
1805 case IceType_i32:
1806 break;
1807 case IceType_i16:
1808 Eax = Traits::RegisterSet::Reg_ax;
1809 Edx = Traits::RegisterSet::Reg_dx;
1810 break;
1811 }
1812 Constant *Zero = Ctx->getConstantZero(Ty);
1813 T_edx = makeReg(Dest->getType(), Edx);
1769 _mov(T_edx, Zero); 1814 _mov(T_edx, Zero);
1770 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1815 _mov(T, Src0, Eax);
1771 _div(T_edx, Src1, T); 1816 _div(T_edx, Src1, T);
1772 _mov(Dest, T_edx); 1817 _mov(Dest, T_edx);
1773 } 1818 }
1774 break; 1819 break;
1775 case InstArithmetic::Srem: 1820 case InstArithmetic::Srem:
1776 // TODO(stichnot): Enable this after doing better performance and cross 1821 // TODO(stichnot): Enable this after doing better performance and cross
1777 // testing. 1822 // testing.
1778 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1823 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1779 // Optimize mod by constant power of 2, but not for Om1 or O0, just to 1824 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
1780 // keep things simple there. 1825 // keep things simple there.
(...skipping 26 matching lines...) Expand all
1807 _add(T, Src0); 1852 _add(T, Src0);
1808 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 1853 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1809 _sub(T, Src0); 1854 _sub(T, Src0);
1810 _neg(T); 1855 _neg(T);
1811 _mov(Dest, T); 1856 _mov(Dest, T);
1812 return; 1857 return;
1813 } 1858 }
1814 } 1859 }
1815 } 1860 }
1816 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1861 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1817 if (isByteSizedArithType(Dest->getType())) { 1862 switch (Type Ty = Dest->getType()) {
1818 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1863 default:
1819 // T is %al. 1864 llvm_unreachable("Bad type for srem");
1820 _cbwdq(T, T); 1865 // fallthrough
1821 _idiv(T, Src1, T); 1866 case IceType_i32:
1822 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1867 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1823 Context.insert(InstFakeDef::create(Func, T_eax));
1824 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't
1825 // mov %ah, %al because it would make x86-64 codegen more complicated. If
1826 // this ever becomes a problem we can introduce a pseudo rem instruction
1827 // that returns the remainder in %al directly (and uses a mov for copying
1828 // %ah to %al.)
1829 static constexpr uint8_t AlSizeInBits = 8;
1830 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));
1831 _mov(Dest, T);
1832 Context.insert(InstFakeUse::create(Func, T_eax));
1833 } else {
1834 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1835 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1868 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1836 _cbwdq(T_edx, T); 1869 _cbwdq(T_edx, T);
1837 _idiv(T_edx, Src1, T); 1870 _idiv(T_edx, Src1, T);
1838 _mov(Dest, T_edx); 1871 _mov(Dest, T_edx);
1872 break;
1873 case IceType_i16:
1874 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1875 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1876 _cbwdq(T_edx, T);
1877 _idiv(T_edx, Src1, T);
1878 _mov(Dest, T_edx);
1879 break;
1880 case IceType_i8:
1881 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1882 // TODO(stichnot): Use register ah for T_edx, and remove the _shr().
1883 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
1884 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1885 _cbwdq(T_edx, T);
1886 _idiv(T_edx, Src1, T);
1887 static constexpr uint8_t AlSizeInBits = 8;
1888 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
1889 _mov(Dest, T_edx);
1890 break;
1839 } 1891 }
1840 break; 1892 break;
1841 case InstArithmetic::Fadd: 1893 case InstArithmetic::Fadd:
1842 _mov(T, Src0); 1894 _mov(T, Src0);
1843 _addss(T, Src1); 1895 _addss(T, Src1);
1844 _mov(Dest, T); 1896 _mov(Dest, T);
1845 break; 1897 break;
1846 case InstArithmetic::Fsub: 1898 case InstArithmetic::Fsub:
1847 _mov(T, Src0); 1899 _mov(T, Src0);
1848 _subss(T, Src1); 1900 _subss(T, Src1);
1849 _mov(Dest, T); 1901 _mov(Dest, T);
1850 break; 1902 break;
1851 case InstArithmetic::Fmul: 1903 case InstArithmetic::Fmul:
1852 _mov(T, Src0); 1904 _mov(T, Src0);
1853 _mulss(T, Src0 == Src1 ? T : Src1); 1905 _mulss(T, Src0 == Src1 ? T : Src1);
1854 _mov(Dest, T); 1906 _mov(Dest, T);
1855 break; 1907 break;
1856 case InstArithmetic::Fdiv: 1908 case InstArithmetic::Fdiv:
1857 _mov(T, Src0); 1909 _mov(T, Src0);
1858 _divss(T, Src1); 1910 _divss(T, Src1);
1859 _mov(Dest, T); 1911 _mov(Dest, T);
1860 break; 1912 break;
1861 case InstArithmetic::Frem: { 1913 case InstArithmetic::Frem: {
1862 const SizeT MaxSrcs = 2; 1914 constexpr SizeT MaxSrcs = 2;
1863 Type Ty = Dest->getType(); 1915 Type Ty = Dest->getType();
1864 InstCall *Call = makeHelperCall( 1916 InstCall *Call = makeHelperCall(
1865 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 1917 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1866 Call->addArg(Src0); 1918 Call->addArg(Src0);
1867 Call->addArg(Src1); 1919 Call->addArg(Src1);
1868 return lowerCall(Call); 1920 return lowerCall(Call);
1869 } 1921 }
1870 } 1922 }
1871 } 1923 }
1872 1924
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
2107 if (isVectorType(Dest->getType())) { 2159 if (isVectorType(Dest->getType())) {
2108 assert(Dest->getType() == IceType_v4i32 && 2160 assert(Dest->getType() == IceType_v4i32 &&
2109 Inst->getSrc(0)->getType() == IceType_v4f32); 2161 Inst->getSrc(0)->getType() == IceType_v4f32);
2110 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2162 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2111 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2163 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2112 Src0RM = legalizeToReg(Src0RM); 2164 Src0RM = legalizeToReg(Src0RM);
2113 Variable *T = makeReg(Dest->getType()); 2165 Variable *T = makeReg(Dest->getType());
2114 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2166 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2115 _movp(Dest, T); 2167 _movp(Dest, T);
2116 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 2168 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2117 const SizeT MaxSrcs = 1; 2169 constexpr SizeT MaxSrcs = 1;
2118 Type SrcType = Inst->getSrc(0)->getType(); 2170 Type SrcType = Inst->getSrc(0)->getType();
2119 InstCall *Call = 2171 InstCall *Call =
2120 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2172 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2121 : H_fptosi_f64_i64, 2173 : H_fptosi_f64_i64,
2122 Dest, MaxSrcs); 2174 Dest, MaxSrcs);
2123 Call->addArg(Inst->getSrc(0)); 2175 Call->addArg(Inst->getSrc(0));
2124 lowerCall(Call); 2176 lowerCall(Call);
2125 } else { 2177 } else {
2126 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2178 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2127 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2179 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
(...skipping 10 matching lines...) Expand all
2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2139 if (Dest->getType() == IceType_i1) 2191 if (Dest->getType() == IceType_i1)
2140 _and(T_2, Ctx->getConstantInt1(1)); 2192 _and(T_2, Ctx->getConstantInt1(1));
2141 _mov(Dest, T_2); 2193 _mov(Dest, T_2);
2142 } 2194 }
2143 break; 2195 break;
2144 case InstCast::Fptoui: 2196 case InstCast::Fptoui:
2145 if (isVectorType(Dest->getType())) { 2197 if (isVectorType(Dest->getType())) {
2146 assert(Dest->getType() == IceType_v4i32 && 2198 assert(Dest->getType() == IceType_v4i32 &&
2147 Inst->getSrc(0)->getType() == IceType_v4f32); 2199 Inst->getSrc(0)->getType() == IceType_v4f32);
2148 const SizeT MaxSrcs = 1; 2200 constexpr SizeT MaxSrcs = 1;
2149 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); 2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2150 Call->addArg(Inst->getSrc(0)); 2202 Call->addArg(Inst->getSrc(0));
2151 lowerCall(Call); 2203 lowerCall(Call);
2152 } else if (Dest->getType() == IceType_i64 || 2204 } else if (Dest->getType() == IceType_i64 ||
2153 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { 2205 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
2154 // Use a helper for both x86-32 and x86-64. 2206 // Use a helper for both x86-32 and x86-64.
2155 const SizeT MaxSrcs = 1; 2207 constexpr SizeT MaxSrcs = 1;
2156 Type DestType = Dest->getType(); 2208 Type DestType = Dest->getType();
2157 Type SrcType = Inst->getSrc(0)->getType(); 2209 Type SrcType = Inst->getSrc(0)->getType();
2158 IceString TargetString; 2210 IceString TargetString;
2159 if (Traits::Is64Bit) { 2211 if (Traits::Is64Bit) {
2160 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2212 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2161 : H_fptoui_f64_i64; 2213 : H_fptoui_f64_i64;
2162 } else if (isInt32Asserting32Or64(DestType)) { 2214 } else if (isInt32Asserting32Or64(DestType)) {
2163 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 2215 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2164 : H_fptoui_f64_i32; 2216 : H_fptoui_f64_i32;
2165 } else { 2217 } else {
(...skipping 28 matching lines...) Expand all
2194 assert(Dest->getType() == IceType_v4f32 && 2246 assert(Dest->getType() == IceType_v4f32 &&
2195 Inst->getSrc(0)->getType() == IceType_v4i32); 2247 Inst->getSrc(0)->getType() == IceType_v4i32);
2196 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2248 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2197 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2249 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2198 Src0RM = legalizeToReg(Src0RM); 2250 Src0RM = legalizeToReg(Src0RM);
2199 Variable *T = makeReg(Dest->getType()); 2251 Variable *T = makeReg(Dest->getType());
2200 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2252 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2201 _movp(Dest, T); 2253 _movp(Dest, T);
2202 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { 2254 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2203 // Use a helper for x86-32. 2255 // Use a helper for x86-32.
2204 const SizeT MaxSrcs = 1; 2256 constexpr SizeT MaxSrcs = 1;
2205 Type DestType = Dest->getType(); 2257 Type DestType = Dest->getType();
2206 InstCall *Call = 2258 InstCall *Call =
2207 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2259 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2208 : H_sitofp_i64_f64, 2260 : H_sitofp_i64_f64,
2209 Dest, MaxSrcs); 2261 Dest, MaxSrcs);
2210 // TODO: Call the correct compiler-rt helper function. 2262 // TODO: Call the correct compiler-rt helper function.
2211 Call->addArg(Inst->getSrc(0)); 2263 Call->addArg(Inst->getSrc(0));
2212 lowerCall(Call); 2264 lowerCall(Call);
2213 return; 2265 return;
2214 } else { 2266 } else {
(...skipping 14 matching lines...) Expand all
2229 _movsx(T_1, Src0RM); 2281 _movsx(T_1, Src0RM);
2230 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2282 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2231 _mov(Dest, T_2); 2283 _mov(Dest, T_2);
2232 } 2284 }
2233 break; 2285 break;
2234 case InstCast::Uitofp: { 2286 case InstCast::Uitofp: {
2235 Operand *Src0 = Inst->getSrc(0); 2287 Operand *Src0 = Inst->getSrc(0);
2236 if (isVectorType(Src0->getType())) { 2288 if (isVectorType(Src0->getType())) {
2237 assert(Dest->getType() == IceType_v4f32 && 2289 assert(Dest->getType() == IceType_v4f32 &&
2238 Src0->getType() == IceType_v4i32); 2290 Src0->getType() == IceType_v4i32);
2239 const SizeT MaxSrcs = 1; 2291 constexpr SizeT MaxSrcs = 1;
2240 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2292 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2241 Call->addArg(Src0); 2293 Call->addArg(Src0);
2242 lowerCall(Call); 2294 lowerCall(Call);
2243 } else if (Src0->getType() == IceType_i64 || 2295 } else if (Src0->getType() == IceType_i64 ||
2244 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { 2296 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2245 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on 2297 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
2246 // x86-32. 2298 // x86-32.
2247 const SizeT MaxSrcs = 1; 2299 constexpr SizeT MaxSrcs = 1;
2248 Type DestType = Dest->getType(); 2300 Type DestType = Dest->getType();
2249 IceString TargetString; 2301 IceString TargetString;
2250 if (isInt32Asserting32Or64(Src0->getType())) { 2302 if (isInt32Asserting32Or64(Src0->getType())) {
2251 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2303 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2252 : H_uitofp_i32_f64; 2304 : H_uitofp_i32_f64;
2253 } else { 2305 } else {
2254 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2306 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2255 : H_uitofp_i64_f64; 2307 : H_uitofp_i64_f64;
2256 } 2308 }
2257 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2309 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
2453 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2505 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2454 ConstantInteger32 *ElementIndex = 2506 ConstantInteger32 *ElementIndex =
2455 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1)); 2507 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2456 // Only constant indices are allowed in PNaCl IR. 2508 // Only constant indices are allowed in PNaCl IR.
2457 assert(ElementIndex); 2509 assert(ElementIndex);
2458 2510
2459 unsigned Index = ElementIndex->getValue(); 2511 unsigned Index = ElementIndex->getValue();
2460 Type Ty = SourceVectNotLegalized->getType(); 2512 Type Ty = SourceVectNotLegalized->getType();
2461 Type ElementTy = typeElementType(Ty); 2513 Type ElementTy = typeElementType(Ty);
2462 Type InVectorElementTy = Traits::getInVectorElementType(Ty); 2514 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2463 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2464 2515
2465 // TODO(wala): Determine the best lowering sequences for each type. 2516 // TODO(wala): Determine the best lowering sequences for each type.
2466 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 || 2517 bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
2467 InstructionSet >= Traits::SSE4_1; 2518 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);
2468 if (CanUsePextr && Ty != IceType_v4f32) { 2519 Variable *ExtractedElementR =
2469 // Use pextrb, pextrw, or pextrd. 2520 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);
2521 if (CanUsePextr) {
2522 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper
2523 // bits of the destination register, so we represent this by always
2524 // extracting into an i32 register. The _mov into Dest below will do
2525 // truncation as necessary.
2470 Constant *Mask = Ctx->getConstantInt32(Index); 2526 Constant *Mask = Ctx->getConstantInt32(Index);
2471 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); 2527 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
2472 _pextr(ExtractedElementR, SourceVectR, Mask); 2528 _pextr(ExtractedElementR, SourceVectR, Mask);
2473 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2529 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2474 // Use pshufd and movd/movss. 2530 // Use pshufd and movd/movss.
2475 Variable *T = nullptr; 2531 Variable *T = nullptr;
2476 if (Index) { 2532 if (Index) {
2477 // The shuffle only needs to occur if the element to be extracted is not 2533 // The shuffle only needs to occur if the element to be extracted is not
2478 // at the lowest index. 2534 // at the lowest index.
2479 Constant *Mask = Ctx->getConstantInt32(Index); 2535 Constant *Mask = Ctx->getConstantInt32(Index);
(...skipping 496 matching lines...) Expand 10 before | Expand all | Expand 10 after
2976 // Use insertps, pinsrb, pinsrw, or pinsrd. 3032 // Use insertps, pinsrb, pinsrw, or pinsrd.
2977 Operand *ElementRM = 3033 Operand *ElementRM =
2978 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 3034 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2979 Operand *SourceVectRM = 3035 Operand *SourceVectRM =
2980 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3036 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2981 Variable *T = makeReg(Ty); 3037 Variable *T = makeReg(Ty);
2982 _movp(T, SourceVectRM); 3038 _movp(T, SourceVectRM);
2983 if (Ty == IceType_v4f32) 3039 if (Ty == IceType_v4f32)
2984 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 3040 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
2985 else 3041 else
3042 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source
3043 // operand is a register, it must be a full r32 register like eax, and not
3044 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates
3045 // for the use of r16 and r8 by converting them through getBaseReg(),
3046 // while emitIAS() validates that the original and base register encodings
3047 // are the same. But for an "interior" register like ah, it should
3048 // probably be copied into an r32 via movzx so that the types work out.
2986 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); 3049 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
2987 _movp(Inst->getDest(), T); 3050 _movp(Inst->getDest(), T);
2988 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 3051 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2989 // Use shufps or movss. 3052 // Use shufps or movss.
2990 Variable *ElementR = nullptr; 3053 Variable *ElementR = nullptr;
2991 Operand *SourceVectRM = 3054 Operand *SourceVectRM =
2992 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3055 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2993 3056
2994 if (InVectorElementTy == IceType_f32) { 3057 if (InVectorElementTy == IceType_f32) {
2995 // ElementR will be in an XMM register since it is floating point. 3058 // ElementR will be in an XMM register since it is floating point.
(...skipping 314 matching lines...) Expand 10 before | Expand all | Expand 10 after
3310 // well-defined value. 3373 // well-defined value.
3311 Operand *Val = legalize(Instr->getArg(0)); 3374 Operand *Val = legalize(Instr->getArg(0));
3312 Operand *FirstVal; 3375 Operand *FirstVal;
3313 Operand *SecondVal = nullptr; 3376 Operand *SecondVal = nullptr;
3314 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 3377 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
3315 FirstVal = loOperand(Val); 3378 FirstVal = loOperand(Val);
3316 SecondVal = hiOperand(Val); 3379 SecondVal = hiOperand(Val);
3317 } else { 3380 } else {
3318 FirstVal = Val; 3381 FirstVal = Val;
3319 } 3382 }
3320 const bool IsCttz = false; 3383 constexpr bool IsCttz = false;
3321 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3384 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3322 SecondVal); 3385 SecondVal);
3323 return; 3386 return;
3324 } 3387 }
3325 case Intrinsics::Cttz: { 3388 case Intrinsics::Cttz: {
3326 // The "is zero undef" parameter is ignored and we always return a 3389 // The "is zero undef" parameter is ignored and we always return a
3327 // well-defined value. 3390 // well-defined value.
3328 Operand *Val = legalize(Instr->getArg(0)); 3391 Operand *Val = legalize(Instr->getArg(0));
3329 Operand *FirstVal; 3392 Operand *FirstVal;
3330 Operand *SecondVal = nullptr; 3393 Operand *SecondVal = nullptr;
3331 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 3394 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
3332 FirstVal = hiOperand(Val); 3395 FirstVal = hiOperand(Val);
3333 SecondVal = loOperand(Val); 3396 SecondVal = loOperand(Val);
3334 } else { 3397 } else {
3335 FirstVal = Val; 3398 FirstVal = Val;
3336 } 3399 }
3337 const bool IsCttz = true; 3400 constexpr bool IsCttz = true;
3338 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3401 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3339 SecondVal); 3402 SecondVal);
3340 return; 3403 return;
3341 } 3404 }
3342 case Intrinsics::Fabs: { 3405 case Intrinsics::Fabs: {
3343 Operand *Src = legalize(Instr->getArg(0)); 3406 Operand *Src = legalize(Instr->getArg(0));
3344 Type Ty = Src->getType(); 3407 Type Ty = Src->getType();
3345 Variable *Dest = Instr->getDest(); 3408 Variable *Dest = Instr->getDest();
3346 Variable *T = makeVectorOfFabsMask(Ty); 3409 Variable *T = makeVectorOfFabsMask(Ty);
3347 // The pand instruction operates on an m128 memory operand, so if Src is an 3410 // The pand instruction operates on an m128 memory operand, so if Src is an
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
3425 Func->setError("Should not be lowering UnknownIntrinsic"); 3488 Func->setError("Should not be lowering UnknownIntrinsic");
3426 return; 3489 return;
3427 } 3490 }
3428 return; 3491 return;
3429 } 3492 }
3430 3493
3431 template <class Machine> 3494 template <class Machine>
3432 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev, 3495 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
3433 Operand *Ptr, Operand *Expected, 3496 Operand *Ptr, Operand *Expected,
3434 Operand *Desired) { 3497 Operand *Desired) {
3435 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) { 3498 Type Ty = Expected->getType();
3499 if (!Traits::Is64Bit && Ty == IceType_i64) {
3436 // Reserve the pre-colored registers first, before adding any more 3500 // Reserve the pre-colored registers first, before adding any more
3437 // infinite-weight variables from formMemoryOperand's legalization. 3501 // infinite-weight variables from formMemoryOperand's legalization.
3438 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3502 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3439 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3503 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3440 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3504 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3441 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3505 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3442 _mov(T_eax, loOperand(Expected)); 3506 _mov(T_eax, loOperand(Expected));
3443 _mov(T_edx, hiOperand(Expected)); 3507 _mov(T_edx, hiOperand(Expected));
3444 _mov(T_ebx, loOperand(Desired)); 3508 _mov(T_ebx, loOperand(Desired));
3445 _mov(T_ecx, hiOperand(Desired)); 3509 _mov(T_ecx, hiOperand(Desired));
3446 typename Traits::X86OperandMem *Addr = 3510 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3447 formMemoryOperand(Ptr, Expected->getType()); 3511 constexpr bool Locked = true;
3448 const bool Locked = true;
3449 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3512 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3450 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3513 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3451 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3514 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3452 _mov(DestLo, T_eax); 3515 _mov(DestLo, T_eax);
3453 _mov(DestHi, T_edx); 3516 _mov(DestHi, T_edx);
3454 return; 3517 return;
3455 } 3518 }
3456 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); 3519 int32_t Eax;
3520 switch (Ty) {
3521 default:
3522 llvm_unreachable("Bad type for cmpxchg");
3523 // fallthrough
3524 case IceType_i32:
3525 Eax = Traits::RegisterSet::Reg_eax;
3526 break;
3527 case IceType_i16:
3528 Eax = Traits::RegisterSet::Reg_ax;
3529 break;
3530 case IceType_i8:
3531 Eax = Traits::RegisterSet::Reg_al;
3532 break;
3533 }
3534 Variable *T_eax = makeReg(Ty, Eax);
3457 _mov(T_eax, Expected); 3535 _mov(T_eax, Expected);
3458 typename Traits::X86OperandMem *Addr = 3536 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3459 formMemoryOperand(Ptr, Expected->getType());
3460 Variable *DesiredReg = legalizeToReg(Desired); 3537 Variable *DesiredReg = legalizeToReg(Desired);
3461 const bool Locked = true; 3538 constexpr bool Locked = true;
3462 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3539 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3463 _mov(DestPrev, T_eax); 3540 _mov(DestPrev, T_eax);
3464 } 3541 }
3465 3542
3466 template <class Machine> 3543 template <class Machine>
3467 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 3544 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3468 Operand *PtrToMem, 3545 Operand *PtrToMem,
3469 Operand *Expected, 3546 Operand *Expected,
3470 Operand *Desired) { 3547 Operand *Desired) {
3471 if (Ctx->getFlags().getOptLevel() == Opt_m1) 3548 if (Ctx->getFlags().getOptLevel() == Opt_m1)
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3553 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3630 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3554 // All the fall-through paths must set this to true, but use this 3631 // All the fall-through paths must set this to true, but use this
3555 // for asserting. 3632 // for asserting.
3556 NeedsCmpxchg = true; 3633 NeedsCmpxchg = true;
3557 Op_Lo = &TargetX86Base<Machine>::_add; 3634 Op_Lo = &TargetX86Base<Machine>::_add;
3558 Op_Hi = &TargetX86Base<Machine>::_adc; 3635 Op_Hi = &TargetX86Base<Machine>::_adc;
3559 break; 3636 break;
3560 } 3637 }
3561 typename Traits::X86OperandMem *Addr = 3638 typename Traits::X86OperandMem *Addr =
3562 formMemoryOperand(Ptr, Dest->getType()); 3639 formMemoryOperand(Ptr, Dest->getType());
3563 const bool Locked = true; 3640 constexpr bool Locked = true;
3564 Variable *T = nullptr; 3641 Variable *T = nullptr;
3565 _mov(T, Val); 3642 _mov(T, Val);
3566 _xadd(Addr, T, Locked); 3643 _xadd(Addr, T, Locked);
3567 _mov(Dest, T); 3644 _mov(Dest, T);
3568 return; 3645 return;
3569 } 3646 }
3570 case Intrinsics::AtomicSub: { 3647 case Intrinsics::AtomicSub: {
3571 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 3648 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
3572 NeedsCmpxchg = true; 3649 NeedsCmpxchg = true;
3573 Op_Lo = &TargetX86Base<Machine>::_sub; 3650 Op_Lo = &TargetX86Base<Machine>::_sub;
3574 Op_Hi = &TargetX86Base<Machine>::_sbb; 3651 Op_Hi = &TargetX86Base<Machine>::_sbb;
3575 break; 3652 break;
3576 } 3653 }
3577 typename Traits::X86OperandMem *Addr = 3654 typename Traits::X86OperandMem *Addr =
3578 formMemoryOperand(Ptr, Dest->getType()); 3655 formMemoryOperand(Ptr, Dest->getType());
3579 const bool Locked = true; 3656 constexpr bool Locked = true;
3580 Variable *T = nullptr; 3657 Variable *T = nullptr;
3581 _mov(T, Val); 3658 _mov(T, Val);
3582 _neg(T); 3659 _neg(T);
3583 _xadd(Addr, T, Locked); 3660 _xadd(Addr, T, Locked);
3584 _mov(Dest, T); 3661 _mov(Dest, T);
3585 return; 3662 return;
3586 } 3663 }
3587 case Intrinsics::AtomicOr: 3664 case Intrinsics::AtomicOr:
3588 // TODO(jvoung): If Dest is null or dead, then some of these 3665 // TODO(jvoung): If Dest is null or dead, then some of these
3589 // operations do not need an "exchange", but just a locked op. 3666 // operations do not need an "exchange", but just a locked op.
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
3677 _mov(T_ecx, T_edx); 3754 _mov(T_ecx, T_edx);
3678 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3755 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3679 } else { 3756 } else {
3680 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3757 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3681 // It just needs the Val loaded into ebx and ecx. 3758 // It just needs the Val loaded into ebx and ecx.
3682 // That can also be done before the loop. 3759 // That can also be done before the loop.
3683 _mov(T_ebx, loOperand(Val)); 3760 _mov(T_ebx, loOperand(Val));
3684 _mov(T_ecx, hiOperand(Val)); 3761 _mov(T_ecx, hiOperand(Val));
3685 Context.insert(Label); 3762 Context.insert(Label);
3686 } 3763 }
3687 const bool Locked = true; 3764 constexpr bool Locked = true;
3688 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3765 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3689 _br(Traits::Cond::Br_ne, Label); 3766 _br(Traits::Cond::Br_ne, Label);
3690 if (!IsXchg8b) { 3767 if (!IsXchg8b) {
3691 // If Val is a variable, model the extended live range of Val through 3768 // If Val is a variable, model the extended live range of Val through
3692 // the end of the loop, since it will be re-used by the loop. 3769 // the end of the loop, since it will be re-used by the loop.
3693 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3770 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3694 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); 3771 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3695 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); 3772 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3696 Context.insert(InstFakeUse::create(Func, ValLo)); 3773 Context.insert(InstFakeUse::create(Func, ValLo));
3697 Context.insert(InstFakeUse::create(Func, ValHi)); 3774 Context.insert(InstFakeUse::create(Func, ValHi));
3698 } 3775 }
3699 } else { 3776 } else {
3700 // For xchg, the loop is slightly smaller and ebx/ecx are used. 3777 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3701 Context.insert(InstFakeUse::create(Func, T_ebx)); 3778 Context.insert(InstFakeUse::create(Func, T_ebx));
3702 Context.insert(InstFakeUse::create(Func, T_ecx)); 3779 Context.insert(InstFakeUse::create(Func, T_ecx));
3703 } 3780 }
3704 // The address base (if any) is also reused in the loop. 3781 // The address base (if any) is also reused in the loop.
3705 if (Variable *Base = Addr->getBase()) 3782 if (Variable *Base = Addr->getBase())
3706 Context.insert(InstFakeUse::create(Func, Base)); 3783 Context.insert(InstFakeUse::create(Func, Base));
3707 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3784 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3708 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3785 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3709 _mov(DestLo, T_eax); 3786 _mov(DestLo, T_eax);
3710 _mov(DestHi, T_edx); 3787 _mov(DestHi, T_edx);
3711 return; 3788 return;
3712 } 3789 }
3713 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3790 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3714 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); 3791 int32_t Eax;
3792 switch (Ty) {
3793 default:
3794 llvm_unreachable("Bad type for atomicRMW");
3795 // fallthrough
3796 case IceType_i32:
3797 Eax = Traits::RegisterSet::Reg_eax;
3798 break;
3799 case IceType_i16:
3800 Eax = Traits::RegisterSet::Reg_ax;
3801 break;
3802 case IceType_i8:
3803 Eax = Traits::RegisterSet::Reg_al;
3804 break;
3805 }
3806 Variable *T_eax = makeReg(Ty, Eax);
3715 _mov(T_eax, Addr); 3807 _mov(T_eax, Addr);
3716 typename Traits::Insts::Label *Label = 3808 typename Traits::Insts::Label *Label =
3717 Traits::Insts::Label::create(Func, this); 3809 Traits::Insts::Label::create(Func, this);
3718 Context.insert(Label); 3810 Context.insert(Label);
3719 // We want to pick a different register for T than Eax, so don't use 3811 // We want to pick a different register for T than Eax, so don't use
3720 // _mov(T == nullptr, T_eax). 3812 // _mov(T == nullptr, T_eax).
3721 Variable *T = makeReg(Ty); 3813 Variable *T = makeReg(Ty);
3722 _mov(T, T_eax); 3814 _mov(T, T_eax);
3723 (this->*Op_Lo)(T, Val); 3815 (this->*Op_Lo)(T, Val);
3724 const bool Locked = true; 3816 constexpr bool Locked = true;
3725 _cmpxchg(Addr, T_eax, T, Locked); 3817 _cmpxchg(Addr, T_eax, T, Locked);
3726 _br(Traits::Cond::Br_ne, Label); 3818 _br(Traits::Cond::Br_ne, Label);
3727 // If Val is a variable, model the extended live range of Val through 3819 // If Val is a variable, model the extended live range of Val through
3728 // the end of the loop, since it will be re-used by the loop. 3820 // the end of the loop, since it will be re-used by the loop.
3729 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3821 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3730 Context.insert(InstFakeUse::create(Func, ValVar)); 3822 Context.insert(InstFakeUse::create(Func, ValVar));
3731 } 3823 }
3732 // The address base (if any) is also reused in the loop. 3824 // The address base (if any) is also reused in the loop.
3733 if (Variable *Base = Addr->getBase()) 3825 if (Variable *Base = Addr->getBase())
3734 Context.insert(InstFakeUse::create(Func, Base)); 3826 Context.insert(InstFakeUse::create(Func, Base));
(...skipping 1476 matching lines...) Expand 10 before | Expand all | Expand 10 after
5211 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 5303 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
5212 Ty == IceType_v16i8); 5304 Ty == IceType_v16i8);
5213 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 5305 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
5214 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 5306 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
5215 SizeT Shift = 5307 SizeT Shift =
5216 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; 5308 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
5217 _psll(Reg, Ctx->getConstantInt8(Shift)); 5309 _psll(Reg, Ctx->getConstantInt8(Shift));
5218 return Reg; 5310 return Reg;
5219 } else { 5311 } else {
5220 // SSE has no left shift operation for vectors of 8 bit integers. 5312 // SSE has no left shift operation for vectors of 8 bit integers.
5221 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 5313 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
5222 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 5314 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
5223 Variable *Reg = makeReg(Ty, RegNum); 5315 Variable *Reg = makeReg(Ty, RegNum);
5224 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 5316 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
5225 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 5317 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
5226 return Reg; 5318 return Reg;
5227 } 5319 }
5228 } 5320 }
5229 5321
5230 /// Construct a mask in a register that can be and'ed with a floating-point 5322 /// Construct a mask in a register that can be and'ed with a floating-point
5231 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32 5323 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
(...skipping 13 matching lines...) Expand all
5245 typename TargetX86Base<Machine>::Traits::X86OperandMem * 5337 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5246 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 5338 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
5247 uint32_t Offset) { 5339 uint32_t Offset) {
5248 // Ensure that Loc is a stack slot. 5340 // Ensure that Loc is a stack slot.
5249 assert(Slot->mustNotHaveReg()); 5341 assert(Slot->mustNotHaveReg());
5250 assert(Slot->getRegNum() == Variable::NoRegister); 5342 assert(Slot->getRegNum() == Variable::NoRegister);
5251 // Compute the location of Loc in memory. 5343 // Compute the location of Loc in memory.
5252 // TODO(wala,stichnot): lea should not 5344 // TODO(wala,stichnot): lea should not
5253 // be required. The address of the stack slot is known at compile time 5345 // be required. The address of the stack slot is known at compile time
5254 // (although not until after addProlog()). 5346 // (although not until after addProlog()).
5255 const Type PointerType = IceType_i32; 5347 constexpr Type PointerType = IceType_i32;
5256 Variable *Loc = makeReg(PointerType); 5348 Variable *Loc = makeReg(PointerType);
5257 _lea(Loc, Slot); 5349 _lea(Loc, Slot);
5258 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 5350 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5259 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); 5351 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
5260 } 5352 }
5261 5353
5262 /// Helper for legalize() to emit the right code to lower an operand to a 5354 /// Helper for legalize() to emit the right code to lower an operand to a
5263 /// register of the appropriate type. 5355 /// register of the appropriate type.
5264 template <class Machine> 5356 template <class Machine>
5265 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 5357 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
5298 if (Subst->mustHaveReg() && !Subst->hasReg()) { 5390 if (Subst->mustHaveReg() && !Subst->hasReg()) {
5299 // At this point we know the substitution will have a register. 5391 // At this point we know the substitution will have a register.
5300 if (From->getType() == Subst->getType()) { 5392 if (From->getType() == Subst->getType()) {
5301 // At this point we know the substitution's register is compatible. 5393 // At this point we know the substitution's register is compatible.
5302 return Subst; 5394 return Subst;
5303 } 5395 }
5304 } 5396 }
5305 } 5397 }
5306 } 5398 }
5307 5399
5308 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { 5400 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
5309 // Before doing anything with a Mem operand, we need to ensure that the 5401 // Before doing anything with a Mem operand, we need to ensure that the
5310 // Base and Index components are in physical registers. 5402 // Base and Index components are in physical registers.
5311 Variable *Base = Mem->getBase(); 5403 Variable *Base = Mem->getBase();
5312 Variable *Index = Mem->getIndex(); 5404 Variable *Index = Mem->getIndex();
5313 Variable *RegBase = nullptr; 5405 Variable *RegBase = nullptr;
5314 Variable *RegIndex = nullptr; 5406 Variable *RegIndex = nullptr;
5315 if (Base) { 5407 if (Base) {
5316 RegBase = legalizeToReg(Base); 5408 RegBase = legalizeToReg(Base);
5317 } 5409 }
5318 if (Index) { 5410 if (Index) {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
5376 // Immediate specifically not allowed 5468 // Immediate specifically not allowed
5377 NeedsReg = true; 5469 NeedsReg = true;
5378 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 5470 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5379 // On x86, FP constants are lowered to mem operands. 5471 // On x86, FP constants are lowered to mem operands.
5380 NeedsReg = true; 5472 NeedsReg = true;
5381 if (NeedsReg) { 5473 if (NeedsReg) {
5382 From = copyToReg(From, RegNum); 5474 From = copyToReg(From, RegNum);
5383 } 5475 }
5384 return From; 5476 return From;
5385 } 5477 }
5386 if (auto Var = llvm::dyn_cast<Variable>(From)) { 5478 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5387 // Check if the variable is guaranteed a physical register. This can happen 5479 // Check if the variable is guaranteed a physical register. This can happen
5388 // either when the variable is pre-colored or when it is assigned infinite 5480 // either when the variable is pre-colored or when it is assigned infinite
5389 // weight. 5481 // weight.
5390 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 5482 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5391 // We need a new physical register for the operand if: 5483 // We need a new physical register for the operand if:
5392 // Mem is not allowed and Var isn't guaranteed a physical 5484 // Mem is not allowed and Var isn't guaranteed a physical
5393 // register, or 5485 // register, or
5394 // RegNum is required and Var->getRegNum() doesn't match. 5486 // RegNum is required and Var->getRegNum() doesn't match.
5395 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 5487 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5396 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5488 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
5631 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); 5723 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5632 Immediate->setShouldBePooled(true); 5724 Immediate->setShouldBePooled(true);
5633 // if we have already assigned a phy register, we must come from 5725 // if we have already assigned a phy register, we must come from
5634 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the 5726 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
5635 // assigned register as this assignment is that start of its use-def 5727 // assigned register as this assignment is that start of its use-def
5636 // chain. So we add RegNum argument here. 5728 // chain. So we add RegNum argument here.
5637 Variable *Reg = makeReg(Immediate->getType(), RegNum); 5729 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5638 IceString Label; 5730 IceString Label;
5639 llvm::raw_string_ostream Label_stream(Label); 5731 llvm::raw_string_ostream Label_stream(Label);
5640 Immediate->emitPoolLabel(Label_stream, Ctx); 5732 Immediate->emitPoolLabel(Label_stream, Ctx);
5641 const RelocOffsetT Offset = 0; 5733 constexpr RelocOffsetT Offset = 0;
5642 const bool SuppressMangling = true; 5734 constexpr bool SuppressMangling = true;
5643 Constant *Symbol = 5735 Constant *Symbol =
5644 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); 5736 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
5645 typename Traits::X86OperandMem *MemOperand = 5737 typename Traits::X86OperandMem *MemOperand =
5646 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr, 5738 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5647 Symbol); 5739 Symbol);
5648 _mov(Reg, MemOperand); 5740 _mov(Reg, MemOperand);
5649 return Reg; 5741 return Reg;
5650 } 5742 }
5651 assert("Unsupported -randomize-pool-immediates option" && false); 5743 assert("Unsupported -randomize-pool-immediates option" && false);
5652 } 5744 }
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
5728 // phi lowering, we should not ask for new physical registers in 5820 // phi lowering, we should not ask for new physical registers in
5729 // general. However, if we do meet Memory Operand during phi lowering, 5821 // general. However, if we do meet Memory Operand during phi lowering,
5730 // we should not blind or pool the immediates for now. 5822 // we should not blind or pool the immediates for now.
5731 if (RegNum != Variable::NoRegister) 5823 if (RegNum != Variable::NoRegister)
5732 return MemOperand; 5824 return MemOperand;
5733 Variable *RegTemp = makeReg(IceType_i32); 5825 Variable *RegTemp = makeReg(IceType_i32);
5734 IceString Label; 5826 IceString Label;
5735 llvm::raw_string_ostream Label_stream(Label); 5827 llvm::raw_string_ostream Label_stream(Label);
5736 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx); 5828 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);
5737 MemOperand->getOffset()->setShouldBePooled(true); 5829 MemOperand->getOffset()->setShouldBePooled(true);
5738 const RelocOffsetT SymOffset = 0; 5830 constexpr RelocOffsetT SymOffset = 0;
5739 bool SuppressMangling = true; 5831 constexpr bool SuppressMangling = true;
5740 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), 5832 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5741 SuppressMangling); 5833 SuppressMangling);
5742 typename Traits::X86OperandMem *SymbolOperand = 5834 typename Traits::X86OperandMem *SymbolOperand =
5743 Traits::X86OperandMem::create( 5835 Traits::X86OperandMem::create(
5744 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); 5836 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
5745 _mov(RegTemp, SymbolOperand); 5837 _mov(RegTemp, SymbolOperand);
5746 // If we have a base variable here, we should add the lea instruction 5838 // If we have a base variable here, we should add the lea instruction
5747 // to add the value of the base variable to RegTemp. If there is no 5839 // to add the value of the base variable to RegTemp. If there is no
5748 // base variable, we won't need this lea instruction. 5840 // base variable, we won't need this lea instruction.
5749 if (MemOperand->getBase()) { 5841 if (MemOperand->getBase()) {
(...skipping 15 matching lines...) Expand all
5765 } 5857 }
5766 // the offset is not eligible for blinding or pooling, return the original 5858 // the offset is not eligible for blinding or pooling, return the original
5767 // mem operand 5859 // mem operand
5768 return MemOperand; 5860 return MemOperand;
5769 } 5861 }
5770 5862
5771 } // end of namespace X86Internal 5863 } // end of namespace X86Internal
5772 } // end of namespace Ice 5864 } // end of namespace Ice
5773 5865
5774 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5866 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8664Traits.h ('k') | src/IceTimerTree.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698