Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(580)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1472623002: Unify alloca, outgoing arg, and prolog construction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review fixes. Also removed StackAdjustment. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/assembler/x86/sandboxing.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 285 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 } 296 }
297 297
298 template <class Machine> void TargetX86Base<Machine>::staticInit() { 298 template <class Machine> void TargetX86Base<Machine>::staticInit() {
299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs); 299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);
300 } 300 }
301 301
302 template <class Machine> void TargetX86Base<Machine>::translateO2() { 302 template <class Machine> void TargetX86Base<Machine>::translateO2() {
303 TimerMarker T(TimerStack::TT_O2, Func); 303 TimerMarker T(TimerStack::TT_O2, Func);
304 304
305 genTargetHelperCalls(); 305 genTargetHelperCalls();
306 Func->dump("After target helper call insertion");
306 307
307 // Merge Alloca instructions, and lay out the stack. 308 // Merge Alloca instructions, and lay out the stack.
308 static constexpr bool SortAndCombineAllocas = true; 309 static constexpr bool SortAndCombineAllocas = true;
309 Func->processAllocas(SortAndCombineAllocas); 310 Func->processAllocas(SortAndCombineAllocas);
310 Func->dump("After Alloca processing"); 311 Func->dump("After Alloca processing");
311 312
312 if (!Ctx->getFlags().getPhiEdgeSplit()) { 313 if (!Ctx->getFlags().getPhiEdgeSplit()) {
313 // Lower Phi instructions. 314 // Lower Phi instructions.
314 Func->placePhiLoads(); 315 Func->placePhiLoads();
315 if (Func->hasError()) 316 if (Func->hasError())
(...skipping 454 matching lines...) Expand 10 before | Expand all | Expand 10 after
770 return; 771 return;
771 Ostream &Str = Ctx->getStrEmit(); 772 Ostream &Str = Ctx->getStrEmit();
772 if (Var->hasReg()) { 773 if (Var->hasReg()) {
773 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 774 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
774 return; 775 return;
775 } 776 }
776 if (Var->mustHaveReg()) { 777 if (Var->mustHaveReg()) {
777 llvm_unreachable("Infinite-weight Variable has no register assigned"); 778 llvm_unreachable("Infinite-weight Variable has no register assigned");
778 } 779 }
779 const int32_t Offset = Var->getStackOffset(); 780 const int32_t Offset = Var->getStackOffset();
780 int32_t OffsetAdj = 0;
781 int32_t BaseRegNum = Var->getBaseRegNum(); 781 int32_t BaseRegNum = Var->getBaseRegNum();
782 if (BaseRegNum == Variable::NoRegister) { 782 if (BaseRegNum == Variable::NoRegister)
783 BaseRegNum = getFrameOrStackReg(); 783 BaseRegNum = getFrameOrStackReg();
784 if (!hasFramePointer()) 784 // Print in the form "Offset(%reg)", taking care that:
785 OffsetAdj = getStackAdjustment();
786 }
787 // Print in the form "OffsetAdj+Offset(%reg)", taking care that:
788 // - OffsetAdj may be 0
789 // - Offset is never printed when it is 0 785 // - Offset is never printed when it is 0
790 // - Offset may be positive or symbolic, so a "+" might be needed
791 786
792 // Only print nonzero OffsetAdj.
793 if (OffsetAdj) {
794 Str << OffsetAdj;
795 }
796 const bool DecorateAsm = Func->getContext()->getFlags().getDecorateAsm(); 787 const bool DecorateAsm = Func->getContext()->getFlags().getDecorateAsm();
797 // Only print Offset when it is nonzero, regardless of DecorateAsm. 788 // Only print Offset when it is nonzero, regardless of DecorateAsm.
798 if (Offset) { 789 if (Offset) {
799 if (OffsetAdj && (DecorateAsm || Offset > 0)) {
800 Str << "+";
801 }
802 if (DecorateAsm) { 790 if (DecorateAsm) {
803 Str << Var->getSymbolicStackOffset(Func); 791 Str << Var->getSymbolicStackOffset(Func);
804 } else { 792 } else {
805 Str << Offset; 793 Str << Offset;
806 } 794 }
807 } 795 }
808 const Type FrameSPTy = Traits::WordType; 796 const Type FrameSPTy = Traits::WordType;
809 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")"; 797 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
810 } 798 }
811 799
812 template <class Machine> 800 template <class Machine>
813 typename TargetX86Base<Machine>::Traits::Address 801 typename TargetX86Base<Machine>::Traits::Address
814 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { 802 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
815 if (Var->hasReg()) 803 if (Var->hasReg())
816 llvm_unreachable("Stack Variable has a register assigned"); 804 llvm_unreachable("Stack Variable has a register assigned");
817 if (Var->mustHaveReg()) { 805 if (Var->mustHaveReg()) {
818 llvm_unreachable("Infinite-weight Variable has no register assigned"); 806 llvm_unreachable("Infinite-weight Variable has no register assigned");
819 } 807 }
820 int32_t Offset = Var->getStackOffset(); 808 int32_t Offset = Var->getStackOffset();
821 int32_t BaseRegNum = Var->getBaseRegNum(); 809 int32_t BaseRegNum = Var->getBaseRegNum();
822 if (Var->getBaseRegNum() == Variable::NoRegister) { 810 if (Var->getBaseRegNum() == Variable::NoRegister)
823 BaseRegNum = getFrameOrStackReg(); 811 BaseRegNum = getFrameOrStackReg();
824 if (!hasFramePointer())
825 Offset += getStackAdjustment();
826 }
827 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset, 812 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,
828 AssemblerFixup::NoFixup); 813 AssemblerFixup::NoFixup);
829 } 814 }
830 815
831 /// Helper function for addProlog(). 816 /// Helper function for addProlog().
832 /// 817 ///
833 /// This assumes Arg is an argument passed on the stack. This sets the frame 818 /// This assumes Arg is an argument passed on the stack. This sets the frame
834 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an 819 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
835 /// I64 arg that has been split into Lo and Hi components, it calls itself 820 /// I64 arg that has been split into Lo and Hi components, it calls itself
836 /// recursively on the components, taking care to handle Lo first because of the 821 /// recursively on the components, taking care to handle Lo first because of the
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
951 936
952 template <class Machine> 937 template <class Machine>
953 llvm::SmallBitVector 938 llvm::SmallBitVector
954 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 939 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
955 RegSetMask Exclude) const { 940 RegSetMask Exclude) const {
956 return Traits::getRegisterSet(Include, Exclude); 941 return Traits::getRegisterSet(Include, Exclude);
957 } 942 }
958 943
959 template <class Machine> 944 template <class Machine>
960 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 945 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
961 if (!Inst->getKnownFrameOffset())
962 setHasFramePointer();
963 // Conservatively require the stack to be aligned. Some stack adjustment 946 // Conservatively require the stack to be aligned. Some stack adjustment
964 // operations implemented below assume that the stack is aligned before the 947 // operations implemented below assume that the stack is aligned before the
965 // alloca. All the alloca code ensures that the stack alignment is preserved 948 // alloca. All the alloca code ensures that the stack alignment is preserved
966 // after the alloca. The stack alignment restriction can be relaxed in some 949 // after the alloca. The stack alignment restriction can be relaxed in some
967 // cases. 950 // cases.
968 NeedsStackAlignment = true; 951 NeedsStackAlignment = true;
969 952
970 // TODO(stichnot): minimize the number of adjustments of esp, etc.
971 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
972 Operand *TotalSize = legalize(Inst->getSizeInBytes());
973 Variable *Dest = Inst->getDest();
974 uint32_t AlignmentParam = Inst->getAlignInBytes();
975 // For default align=0, set it to the real value 1, to avoid any 953 // For default align=0, set it to the real value 1, to avoid any
976 // bit-manipulation problems below. 954 // bit-manipulation problems below.
977 AlignmentParam = std::max(AlignmentParam, 1u); 955 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
978 956
979 // LLVM enforces power of 2 alignment. 957 // LLVM enforces power of 2 alignment.
980 assert(llvm::isPowerOf2_32(AlignmentParam)); 958 assert(llvm::isPowerOf2_32(AlignmentParam));
981 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 959 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
982 960
983 uint32_t Alignment = 961 const uint32_t Alignment =
984 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 962 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
985 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { 963 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
964 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
965 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
966 const bool UseFramePointer =
967 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
968
969 if (UseFramePointer)
986 setHasFramePointer(); 970 setHasFramePointer();
971
972 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
973 if (OverAligned) {
987 _and(esp, Ctx->getConstantInt32(-Alignment)); 974 _and(esp, Ctx->getConstantInt32(-Alignment));
988 } 975 }
976
977 Variable *Dest = Inst->getDest();
978 Operand *TotalSize = legalize(Inst->getSizeInBytes());
979
989 if (const auto *ConstantTotalSize = 980 if (const auto *ConstantTotalSize =
990 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 981 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
991 uint32_t Value = ConstantTotalSize->getValue(); 982 const uint32_t Value =
992 Value = Utils::applyAlignment(Value, Alignment); 983 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
993 if (Inst->getKnownFrameOffset()) { 984 if (!UseFramePointer) {
994 _adjust_stack(Value); 985 // If we don't need a Frame Pointer, this alloca has a known offset to the
986 // stack pointer. We don't need adjust the stack pointer, nor assign any
987 // value to Dest, as Dest is rematerializable.
988 assert(Dest->isRematerializable());
995 FixedAllocaSizeBytes += Value; 989 FixedAllocaSizeBytes += Value;
990 Context.insert(InstFakeDef::create(Func, Dest));
996 } else { 991 } else {
997 _sub(esp, Ctx->getConstantInt32(Value)); 992 _sub(esp, Ctx->getConstantInt32(Value));
998 } 993 }
999 } else { 994 } else {
1000 // Non-constant sizes need to be adjusted to the next highest multiple of 995 // Non-constant sizes need to be adjusted to the next highest multiple of
1001 // the required alignment at runtime. 996 // the required alignment at runtime.
1002 Variable *T = makeReg(IceType_i32); 997 Variable *T = makeReg(IceType_i32);
1003 _mov(T, TotalSize); 998 _mov(T, TotalSize);
1004 _add(T, Ctx->getConstantInt32(Alignment - 1)); 999 _add(T, Ctx->getConstantInt32(Alignment - 1));
1005 _and(T, Ctx->getConstantInt32(-Alignment)); 1000 _and(T, Ctx->getConstantInt32(-Alignment));
1006 _sub(esp, T); 1001 _sub(esp, T);
1007 } 1002 }
1008 _mov(Dest, esp); 1003 // Add enough to the returned address to account for the out args area.
1004 uint32_t OutArgsSize = maxOutArgsSizeBytes();
1005 if (OutArgsSize > 0) {
1006 Variable *T = makeReg(IceType_i32);
1007 typename Traits::X86OperandMem *CalculateOperand =
1008 Traits::X86OperandMem::create(
1009 Func, IceType_i32, esp,
1010 Ctx->getConstantInt(IceType_i32, OutArgsSize));
1011 _lea(T, CalculateOperand);
1012 _mov(Dest, T);
1013 } else {
1014 _mov(Dest, esp);
1015 }
1009 } 1016 }
1010 1017
1011 /// Strength-reduce scalar integer multiplication by a constant (for i32 or 1018 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
1012 /// narrower) for certain constants. The lea instruction can be used to multiply 1019 /// narrower) for certain constants. The lea instruction can be used to multiply
1013 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of 1020 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1014 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 1021 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1015 /// lea-based multiplies by 5, combined with left-shifting by 2. 1022 /// lea-based multiplies by 5, combined with left-shifting by 2.
1016 template <class Machine> 1023 template <class Machine>
1017 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1024 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1018 int32_t Src1) { 1025 int32_t Src1) {
(...skipping 329 matching lines...) Expand 10 before | Expand all | Expand 10 after
1348 (void)SwapCount; 1355 (void)SwapCount;
1349 } 1356 }
1350 if (!Traits::Is64Bit && Ty == IceType_i64) { 1357 if (!Traits::Is64Bit && Ty == IceType_i64) {
1351 // These x86-32 helper-call-involved instructions are lowered in this 1358 // These x86-32 helper-call-involved instructions are lowered in this
1352 // separate switch. This is because loOperand() and hiOperand() may insert 1359 // separate switch. This is because loOperand() and hiOperand() may insert
1353 // redundant instructions for constant blinding and pooling. Such redundant 1360 // redundant instructions for constant blinding and pooling. Such redundant
1354 // instructions will fail liveness analysis under -Om1 setting. And, 1361 // instructions will fail liveness analysis under -Om1 setting. And,
1355 // actually these arguments do not need to be processed with loOperand() 1362 // actually these arguments do not need to be processed with loOperand()
1356 // and hiOperand() to be used. 1363 // and hiOperand() to be used.
1357 switch (Inst->getOp()) { 1364 switch (Inst->getOp()) {
1358 case InstArithmetic::Udiv: { 1365 case InstArithmetic::Udiv:
1359 constexpr SizeT MaxSrcs = 2; 1366 case InstArithmetic::Sdiv:
1360 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1367 case InstArithmetic::Urem:
1361 Call->addArg(Inst->getSrc(0)); 1368 case InstArithmetic::Srem:
1362 Call->addArg(Inst->getSrc(1)); 1369 llvm::report_fatal_error("Helper call was expected");
1363 lowerCall(Call);
1364 return; 1370 return;
1365 }
1366 case InstArithmetic::Sdiv: {
1367 constexpr SizeT MaxSrcs = 2;
1368 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
1369 Call->addArg(Inst->getSrc(0));
1370 Call->addArg(Inst->getSrc(1));
1371 lowerCall(Call);
1372 return;
1373 }
1374 case InstArithmetic::Urem: {
1375 constexpr SizeT MaxSrcs = 2;
1376 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
1377 Call->addArg(Inst->getSrc(0));
1378 Call->addArg(Inst->getSrc(1));
1379 lowerCall(Call);
1380 return;
1381 }
1382 case InstArithmetic::Srem: {
1383 constexpr SizeT MaxSrcs = 2;
1384 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
1385 Call->addArg(Inst->getSrc(0));
1386 Call->addArg(Inst->getSrc(1));
1387 lowerCall(Call);
1388 return;
1389 }
1390 default: 1371 default:
1391 break; 1372 break;
1392 } 1373 }
1393 1374
1394 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1375 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1395 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1376 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1396 Operand *Src0Lo = loOperand(Src0); 1377 Operand *Src0Lo = loOperand(Src0);
1397 Operand *Src0Hi = hiOperand(Src0); 1378 Operand *Src0Hi = hiOperand(Src0);
1398 Operand *Src1Lo = loOperand(Src1); 1379 Operand *Src1Lo = loOperand(Src1);
1399 Operand *Src1Hi = hiOperand(Src1); 1380 Operand *Src1Hi = hiOperand(Src1);
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
1574 Variable *T4 = makeReg(IceType_v4i32); 1555 Variable *T4 = makeReg(IceType_v4i32);
1575 _movp(T1, Src0); 1556 _movp(T1, Src0);
1576 _pshufd(T2, Src0, Mask1030); 1557 _pshufd(T2, Src0, Mask1030);
1577 _pshufd(T3, Src1, Mask1030); 1558 _pshufd(T3, Src1, Mask1030);
1578 _pmuludq(T1, Src1); 1559 _pmuludq(T1, Src1);
1579 _pmuludq(T2, T3); 1560 _pmuludq(T2, T3);
1580 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); 1561 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1581 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); 1562 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1582 _movp(Dest, T4); 1563 _movp(Dest, T4);
1583 } else if (Ty == IceType_v16i8) { 1564 } else if (Ty == IceType_v16i8) {
1584 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1565 llvm::report_fatal_error("Scalarized operation was expected");
1585 } else { 1566 } else {
1586 llvm::report_fatal_error("Invalid vector multiply type"); 1567 llvm::report_fatal_error("Invalid vector multiply type");
1587 } 1568 }
1588 } break; 1569 } break;
1589 case InstArithmetic::Shl: 1570 case InstArithmetic::Shl:
1590 case InstArithmetic::Lshr: 1571 case InstArithmetic::Lshr:
1591 case InstArithmetic::Ashr: 1572 case InstArithmetic::Ashr:
1592 case InstArithmetic::Udiv: 1573 case InstArithmetic::Udiv:
1593 case InstArithmetic::Urem: 1574 case InstArithmetic::Urem:
1594 case InstArithmetic::Sdiv: 1575 case InstArithmetic::Sdiv:
1595 case InstArithmetic::Srem: 1576 case InstArithmetic::Srem:
1596 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1577 llvm::report_fatal_error("Scalarized operation was expected");
1597 break; 1578 break;
1598 case InstArithmetic::Fadd: { 1579 case InstArithmetic::Fadd: {
1599 Variable *T = makeReg(Ty); 1580 Variable *T = makeReg(Ty);
1600 _movp(T, Src0); 1581 _movp(T, Src0);
1601 _addps(T, Src1); 1582 _addps(T, Src1);
1602 _movp(Dest, T); 1583 _movp(Dest, T);
1603 } break; 1584 } break;
1604 case InstArithmetic::Fsub: { 1585 case InstArithmetic::Fsub: {
1605 Variable *T = makeReg(Ty); 1586 Variable *T = makeReg(Ty);
1606 _movp(T, Src0); 1587 _movp(T, Src0);
1607 _subps(T, Src1); 1588 _subps(T, Src1);
1608 _movp(Dest, T); 1589 _movp(Dest, T);
1609 } break; 1590 } break;
1610 case InstArithmetic::Fmul: { 1591 case InstArithmetic::Fmul: {
1611 Variable *T = makeReg(Ty); 1592 Variable *T = makeReg(Ty);
1612 _movp(T, Src0); 1593 _movp(T, Src0);
1613 _mulps(T, Src0 == Src1 ? T : Src1); 1594 _mulps(T, Src0 == Src1 ? T : Src1);
1614 _movp(Dest, T); 1595 _movp(Dest, T);
1615 } break; 1596 } break;
1616 case InstArithmetic::Fdiv: { 1597 case InstArithmetic::Fdiv: {
1617 Variable *T = makeReg(Ty); 1598 Variable *T = makeReg(Ty);
1618 _movp(T, Src0); 1599 _movp(T, Src0);
1619 _divps(T, Src1); 1600 _divps(T, Src1);
1620 _movp(Dest, T); 1601 _movp(Dest, T);
1621 } break; 1602 } break;
1622 case InstArithmetic::Frem: 1603 case InstArithmetic::Frem:
1623 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1604 llvm::report_fatal_error("Scalarized operation was expected");
1624 break; 1605 break;
1625 } 1606 }
1626 return; 1607 return;
1627 } 1608 }
1628 Variable *T_edx = nullptr; 1609 Variable *T_edx = nullptr;
1629 Variable *T = nullptr; 1610 Variable *T = nullptr;
1630 switch (Inst->getOp()) { 1611 switch (Inst->getOp()) {
1631 case InstArithmetic::_num: 1612 case InstArithmetic::_num:
1632 llvm_unreachable("Unknown arithmetic operator"); 1613 llvm_unreachable("Unknown arithmetic operator");
1633 break; 1614 break;
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
1884 case InstArithmetic::Fmul: 1865 case InstArithmetic::Fmul:
1885 _mov(T, Src0); 1866 _mov(T, Src0);
1886 _mulss(T, Src0 == Src1 ? T : Src1); 1867 _mulss(T, Src0 == Src1 ? T : Src1);
1887 _mov(Dest, T); 1868 _mov(Dest, T);
1888 break; 1869 break;
1889 case InstArithmetic::Fdiv: 1870 case InstArithmetic::Fdiv:
1890 _mov(T, Src0); 1871 _mov(T, Src0);
1891 _divss(T, Src1); 1872 _divss(T, Src1);
1892 _mov(Dest, T); 1873 _mov(Dest, T);
1893 break; 1874 break;
1894 case InstArithmetic::Frem: { 1875 case InstArithmetic::Frem:
1895 constexpr SizeT MaxSrcs = 2; 1876 llvm::report_fatal_error("Helper call was expected");
1896 InstCall *Call = makeHelperCall( 1877 break;
1897 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1898 Call->addArg(Src0);
1899 Call->addArg(Src1);
1900 return lowerCall(Call);
1901 }
1902 } 1878 }
1903 } 1879 }
1904 1880
1905 template <class Machine> 1881 template <class Machine>
1906 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) { 1882 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
1907 Variable *Dest = Inst->getDest(); 1883 Variable *Dest = Inst->getDest();
1908 if (Dest->isRematerializable()) { 1884 if (Dest->isRematerializable()) {
1909 Context.insert(InstFakeDef::create(Func, Dest)); 1885 Context.insert(InstFakeDef::create(Func, Dest));
1910 return; 1886 return;
1911 } 1887 }
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
2154 if (isVectorType(DestTy)) { 2130 if (isVectorType(DestTy)) {
2155 assert(DestTy == IceType_v4i32 && 2131 assert(DestTy == IceType_v4i32 &&
2156 Inst->getSrc(0)->getType() == IceType_v4f32); 2132 Inst->getSrc(0)->getType() == IceType_v4f32);
2157 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2133 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2158 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2134 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2159 Src0RM = legalizeToReg(Src0RM); 2135 Src0RM = legalizeToReg(Src0RM);
2160 Variable *T = makeReg(DestTy); 2136 Variable *T = makeReg(DestTy);
2161 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2137 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2162 _movp(Dest, T); 2138 _movp(Dest, T);
2163 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { 2139 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2164 constexpr SizeT MaxSrcs = 1; 2140 llvm::report_fatal_error("Helper call was expected");
2165 Type SrcType = Inst->getSrc(0)->getType();
2166 InstCall *Call =
2167 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2168 : H_fptosi_f64_i64,
2169 Dest, MaxSrcs);
2170 Call->addArg(Inst->getSrc(0));
2171 lowerCall(Call);
2172 } else { 2141 } else {
2173 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2142 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2174 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2143 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2175 Variable *T_1 = nullptr; 2144 Variable *T_1 = nullptr;
2176 if (Traits::Is64Bit && DestTy == IceType_i64) { 2145 if (Traits::Is64Bit && DestTy == IceType_i64) {
2177 T_1 = makeReg(IceType_i64); 2146 T_1 = makeReg(IceType_i64);
2178 } else { 2147 } else {
2179 assert(DestTy != IceType_i64); 2148 assert(DestTy != IceType_i64);
2180 T_1 = makeReg(IceType_i32); 2149 T_1 = makeReg(IceType_i32);
2181 } 2150 }
2182 // cvt() requires its integer argument to be a GPR. 2151 // cvt() requires its integer argument to be a GPR.
2183 Variable *T_2 = makeReg(DestTy); 2152 Variable *T_2 = makeReg(DestTy);
2184 if (isByteSizedType(DestTy)) { 2153 if (isByteSizedType(DestTy)) {
2185 assert(T_1->getType() == IceType_i32); 2154 assert(T_1->getType() == IceType_i32);
2186 T_1->setRegClass(RCX86_Is32To8); 2155 T_1->setRegClass(RCX86_Is32To8);
2187 T_2->setRegClass(RCX86_IsTrunc8Rcvr); 2156 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2188 } 2157 }
2189 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2158 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2159 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2191 if (DestTy == IceType_i1) 2160 if (DestTy == IceType_i1)
2192 _and(T_2, Ctx->getConstantInt1(1)); 2161 _and(T_2, Ctx->getConstantInt1(1));
2193 _mov(Dest, T_2); 2162 _mov(Dest, T_2);
2194 } 2163 }
2195 break; 2164 break;
2196 case InstCast::Fptoui: 2165 case InstCast::Fptoui:
2197 if (isVectorType(DestTy)) { 2166 if (isVectorType(DestTy)) {
2198 assert(DestTy == IceType_v4i32 && 2167 llvm::report_fatal_error("Helper call was expected");
2199 Inst->getSrc(0)->getType() == IceType_v4f32);
2200 constexpr SizeT MaxSrcs = 1;
2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2202 Call->addArg(Inst->getSrc(0));
2203 lowerCall(Call);
2204 } else if (DestTy == IceType_i64 || 2168 } else if (DestTy == IceType_i64 ||
2205 (!Traits::Is64Bit && DestTy == IceType_i32)) { 2169 (!Traits::Is64Bit && DestTy == IceType_i32)) {
2206 // Use a helper for both x86-32 and x86-64. 2170 llvm::report_fatal_error("Helper call was expected");
2207 constexpr SizeT MaxSrcs = 1;
2208 Type SrcType = Inst->getSrc(0)->getType();
2209 IceString TargetString;
2210 if (Traits::Is64Bit) {
2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2212 : H_fptoui_f64_i64;
2213 } else if (isInt32Asserting32Or64(DestTy)) {
2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2215 : H_fptoui_f64_i32;
2216 } else {
2217 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2218 : H_fptoui_f64_i64;
2219 }
2220 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2221 Call->addArg(Inst->getSrc(0));
2222 lowerCall(Call);
2223 return;
2224 } else { 2171 } else {
2225 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2172 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2226 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2173 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2227 assert(DestTy != IceType_i64); 2174 assert(DestTy != IceType_i64);
2228 Variable *T_1 = nullptr; 2175 Variable *T_1 = nullptr;
2229 if (Traits::Is64Bit && DestTy == IceType_i32) { 2176 if (Traits::Is64Bit && DestTy == IceType_i32) {
2230 T_1 = makeReg(IceType_i64); 2177 T_1 = makeReg(IceType_i64);
2231 } else { 2178 } else {
2232 assert(DestTy != IceType_i32); 2179 assert(DestTy != IceType_i32);
2233 T_1 = makeReg(IceType_i32); 2180 T_1 = makeReg(IceType_i32);
(...skipping 15 matching lines...) Expand all
2249 if (isVectorType(DestTy)) { 2196 if (isVectorType(DestTy)) {
2250 assert(DestTy == IceType_v4f32 && 2197 assert(DestTy == IceType_v4f32 &&
2251 Inst->getSrc(0)->getType() == IceType_v4i32); 2198 Inst->getSrc(0)->getType() == IceType_v4i32);
2252 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2199 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2253 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2200 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2254 Src0RM = legalizeToReg(Src0RM); 2201 Src0RM = legalizeToReg(Src0RM);
2255 Variable *T = makeReg(DestTy); 2202 Variable *T = makeReg(DestTy);
2256 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2203 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2257 _movp(Dest, T); 2204 _movp(Dest, T);
2258 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { 2205 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2259 // Use a helper for x86-32. 2206 llvm::report_fatal_error("Helper call was expected");
2260 constexpr SizeT MaxSrcs = 1;
2261 InstCall *Call =
2262 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
2263 : H_sitofp_i64_f64,
2264 Dest, MaxSrcs);
2265 // TODO: Call the correct compiler-rt helper function.
2266 Call->addArg(Inst->getSrc(0));
2267 lowerCall(Call);
2268 return;
2269 } else { 2207 } else {
2270 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2208 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2271 // Sign-extend the operand. 2209 // Sign-extend the operand.
2272 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2210 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2273 Variable *T_1 = nullptr; 2211 Variable *T_1 = nullptr;
2274 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { 2212 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2275 T_1 = makeReg(IceType_i64); 2213 T_1 = makeReg(IceType_i64);
2276 } else { 2214 } else {
2277 assert(Src0RM->getType() != IceType_i64); 2215 assert(Src0RM->getType() != IceType_i64);
2278 T_1 = makeReg(IceType_i32); 2216 T_1 = makeReg(IceType_i32);
2279 } 2217 }
2280 Variable *T_2 = makeReg(DestTy); 2218 Variable *T_2 = makeReg(DestTy);
2281 if (Src0RM->getType() == T_1->getType()) 2219 if (Src0RM->getType() == T_1->getType())
2282 _mov(T_1, Src0RM); 2220 _mov(T_1, Src0RM);
2283 else 2221 else
2284 _movsx(T_1, Src0RM); 2222 _movsx(T_1, Src0RM);
2285 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2223 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2286 _mov(Dest, T_2); 2224 _mov(Dest, T_2);
2287 } 2225 }
2288 break; 2226 break;
2289 case InstCast::Uitofp: { 2227 case InstCast::Uitofp: {
2290 Operand *Src0 = Inst->getSrc(0); 2228 Operand *Src0 = Inst->getSrc(0);
2291 if (isVectorType(Src0->getType())) { 2229 if (isVectorType(Src0->getType())) {
2292 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32); 2230 llvm::report_fatal_error("Helper call was expected");
2293 constexpr SizeT MaxSrcs = 1;
2294 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2295 Call->addArg(Src0);
2296 lowerCall(Call);
2297 } else if (Src0->getType() == IceType_i64 || 2231 } else if (Src0->getType() == IceType_i64 ||
2298 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { 2232 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2299 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on 2233 llvm::report_fatal_error("Helper call was expected");
2300 // x86-32.
2301 constexpr SizeT MaxSrcs = 1;
2302 IceString TargetString;
2303 if (isInt32Asserting32Or64(Src0->getType())) {
2304 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
2305 : H_uitofp_i32_f64;
2306 } else {
2307 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
2308 : H_uitofp_i64_f64;
2309 }
2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2311 Call->addArg(Src0);
2312 lowerCall(Call);
2313 return;
2314 } else { 2234 } else {
2315 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2235 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2316 // Zero-extend the operand. 2236 // Zero-extend the operand.
2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2237 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2318 Variable *T_1 = nullptr; 2238 Variable *T_1 = nullptr;
2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { 2239 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2320 T_1 = makeReg(IceType_i64); 2240 T_1 = makeReg(IceType_i64);
2321 } else { 2241 } else {
2322 assert(Src0RM->getType() != IceType_i64); 2242 assert(Src0RM->getType() != IceType_i64);
2323 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); 2243 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
(...skipping 13 matching lines...) Expand all
2337 Operand *Src0 = Inst->getSrc(0); 2257 Operand *Src0 = Inst->getSrc(0);
2338 if (DestTy == Src0->getType()) { 2258 if (DestTy == Src0->getType()) {
2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); 2259 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2340 lowerAssign(Assign); 2260 lowerAssign(Assign);
2341 return; 2261 return;
2342 } 2262 }
2343 switch (DestTy) { 2263 switch (DestTy) {
2344 default: 2264 default:
2345 llvm_unreachable("Unexpected Bitcast dest type"); 2265 llvm_unreachable("Unexpected Bitcast dest type");
2346 case IceType_i8: { 2266 case IceType_i8: {
2347 assert(Src0->getType() == IceType_v8i1); 2267 llvm::report_fatal_error("Helper call was expected");
2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2349 Call->addArg(Src0);
2350 lowerCall(Call);
2351 } break; 2268 } break;
2352 case IceType_i16: { 2269 case IceType_i16: {
2353 assert(Src0->getType() == IceType_v16i1); 2270 llvm::report_fatal_error("Helper call was expected");
2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2355 Call->addArg(Src0);
2356 lowerCall(Call);
2357 } break; 2271 } break;
2358 case IceType_i32: 2272 case IceType_i32:
2359 case IceType_f32: { 2273 case IceType_f32: {
2360 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2274 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2361 Type SrcType = Src0RM->getType(); 2275 Type SrcType = Src0RM->getType();
2362 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || 2276 assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
2363 (DestTy == IceType_f32 && SrcType == IceType_i32)); 2277 (DestTy == IceType_f32 && SrcType == IceType_i32));
2364 // a.i32 = bitcast b.f32 ==> 2278 // a.i32 = bitcast b.f32 ==>
2365 // t.f32 = b.f32 2279 // t.f32 = b.f32
2366 // s.f32 = spill t.f32 2280 // s.f32 = spill t.f32
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
2462 // SpillLo is considered a "use" of Spill so define Spill before it is 2376 // SpillLo is considered a "use" of Spill so define Spill before it is
2463 // used. 2377 // used.
2464 Context.insert(InstFakeDef::create(Func, Spill)); 2378 Context.insert(InstFakeDef::create(Func, Spill));
2465 _store(T_Lo, SpillLo); 2379 _store(T_Lo, SpillLo);
2466 _mov(T_Hi, hiOperand(Src0)); 2380 _mov(T_Hi, hiOperand(Src0));
2467 _store(T_Hi, SpillHi); 2381 _store(T_Hi, SpillHi);
2468 _movq(Dest, Spill); 2382 _movq(Dest, Spill);
2469 } 2383 }
2470 } break; 2384 } break;
2471 case IceType_v8i1: { 2385 case IceType_v8i1: {
2472 assert(Src0->getType() == IceType_i8); 2386 llvm::report_fatal_error("Helper call was expected");
2473 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
2474 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2475 // Arguments to functions are required to be at least 32 bits wide.
2476 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2477 Call->addArg(Src0AsI32);
2478 lowerCall(Call);
2479 } break; 2387 } break;
2480 case IceType_v16i1: { 2388 case IceType_v16i1: {
2481 assert(Src0->getType() == IceType_i16); 2389 llvm::report_fatal_error("Helper call was expected");
2482 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
2483 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
2484 // Arguments to functions are required to be at least 32 bits wide.
2485 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
2486 Call->addArg(Src0AsI32);
2487 lowerCall(Call);
2488 } break; 2390 } break;
2489 case IceType_v8i16: 2391 case IceType_v8i16:
2490 case IceType_v16i8: 2392 case IceType_v16i8:
2491 case IceType_v4i32: 2393 case IceType_v4i32:
2492 case IceType_v4f32: { 2394 case IceType_v4f32: {
2493 _movp(Dest, legalizeToReg(Src0)); 2395 _movp(Dest, legalizeToReg(Src0));
2494 } break; 2396 } break;
2495 } 2397 }
2496 break; 2398 break;
2497 } 2399 }
(...skipping 2661 matching lines...) Expand 10 before | Expand all | Expand 10 after
5159 Type Ty = Dest->getType(); 5061 Type Ty = Dest->getType();
5160 Type ElementTy = typeElementType(Ty); 5062 Type ElementTy = typeElementType(Ty);
5161 SizeT NumElements = typeNumElements(Ty); 5063 SizeT NumElements = typeNumElements(Ty);
5162 5064
5163 Operand *T = Ctx->getConstantUndef(Ty); 5065 Operand *T = Ctx->getConstantUndef(Ty);
5164 for (SizeT I = 0; I < NumElements; ++I) { 5066 for (SizeT I = 0; I < NumElements; ++I) {
5165 Constant *Index = Ctx->getConstantInt32(I); 5067 Constant *Index = Ctx->getConstantInt32(I);
5166 5068
5167 // Extract the next two inputs. 5069 // Extract the next two inputs.
5168 Variable *Op0 = Func->makeVariable(ElementTy); 5070 Variable *Op0 = Func->makeVariable(ElementTy);
5169 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); 5071 Context.insert(InstExtractElement::create(Func, Op0, Src0, Index));
5170 Variable *Op1 = Func->makeVariable(ElementTy); 5072 Variable *Op1 = Func->makeVariable(ElementTy);
5171 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 5073 Context.insert(InstExtractElement::create(Func, Op1, Src1, Index));
5172 5074
5173 // Perform the arithmetic as a scalar operation. 5075 // Perform the arithmetic as a scalar operation.
5174 Variable *Res = Func->makeVariable(ElementTy); 5076 Variable *Res = Func->makeVariable(ElementTy);
5175 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 5077 auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1);
5078 Context.insert(Arith);
5079 // We might have created an operation that needed a helper call.
5080 genTargetHelperCallFor(Arith);
5176 5081
5177 // Insert the result into position. 5082 // Insert the result into position.
5178 Variable *DestT = Func->makeVariable(Ty); 5083 Variable *DestT = Func->makeVariable(Ty);
5179 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); 5084 Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index));
5180 T = DestT; 5085 T = DestT;
5181 } 5086 }
5182 5087
5183 lowerAssign(InstAssign::create(Func, Dest, T)); 5088 Context.insert(InstAssign::create(Func, Dest, T));
5184 } 5089 }
5185 5090
5186 /// The following pattern occurs often in lowered C and C++ code: 5091 /// The following pattern occurs often in lowered C and C++ code:
5187 /// 5092 ///
5188 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 5093 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
5189 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> 5094 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>
5190 /// 5095 ///
5191 /// We can eliminate the sext operation by copying the result of pcmpeqd, 5096 /// We can eliminate the sext operation by copying the result of pcmpeqd,
5192 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the 5097 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
5193 /// sext operation. 5098 /// sext operation.
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
5316 } 5221 }
5317 5222
5318 // Pause constant blinding or pooling, blinding or pooling will be done later 5223 // Pause constant blinding or pooling, blinding or pooling will be done later
5319 // during phi lowering assignments 5224 // during phi lowering assignments
5320 BoolFlagSaver B(RandomizationPoolingPaused, true); 5225 BoolFlagSaver B(RandomizationPoolingPaused, true);
5321 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 5226 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
5322 this, Context.getNode(), Func); 5227 this, Context.getNode(), Func);
5323 } 5228 }
5324 5229
5325 template <class Machine> 5230 template <class Machine>
5326 uint32_t 5231 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) {
5327 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) { 5232 uint32_t StackArgumentsSize = 0;
5233 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5234 const char *HelperName = nullptr;
5235 Variable *Dest = Arith->getDest();
5236 Type DestTy = Dest->getType();
5237 if (!Traits::Is64Bit && DestTy == IceType_i64) {
5238 switch (Arith->getOp()) {
5239 default:
5240 return;
5241 case InstArithmetic::Udiv:
5242 HelperName = H_udiv_i64;
5243 break;
5244 case InstArithmetic::Sdiv:
5245 HelperName = H_sdiv_i64;
5246 break;
5247 case InstArithmetic::Urem:
5248 HelperName = H_urem_i64;
5249 break;
5250 case InstArithmetic::Srem:
5251 HelperName = H_srem_i64;
5252 break;
5253 }
5254 } else if (isVectorType(DestTy)) {
5255 Variable *Dest = Arith->getDest();
5256 Operand *Src0 = Arith->getSrc(0);
5257 Operand *Src1 = Arith->getSrc(1);
5258 switch (Arith->getOp()) {
5259 default:
5260 return;
5261 case InstArithmetic::Mul:
5262 if (DestTy == IceType_v16i8) {
5263 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
5264 Arith->setDeleted();
5265 }
5266 return;
5267 case InstArithmetic::Shl:
5268 case InstArithmetic::Lshr:
5269 case InstArithmetic::Ashr:
5270 case InstArithmetic::Udiv:
5271 case InstArithmetic::Urem:
5272 case InstArithmetic::Sdiv:
5273 case InstArithmetic::Srem:
5274 case InstArithmetic::Frem:
5275 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);
5276 Arith->setDeleted();
5277 return;
5278 }
5279 } else {
5280 switch (Arith->getOp()) {
5281 default:
5282 return;
5283 case InstArithmetic::Frem:
5284 if (isFloat32Asserting32Or64(DestTy))
5285 HelperName = H_frem_f32;
5286 else
5287 HelperName = H_frem_f64;
5288 }
5289 }
5290 constexpr SizeT MaxSrcs = 2;
5291 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
5292 Call->addArg(Arith->getSrc(0));
5293 Call->addArg(Arith->getSrc(1));
5294 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
5295 Context.insert(Call);
5296 Arith->setDeleted();
5297 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
5298 InstCast::OpKind CastKind = Cast->getCastKind();
5299 Operand *Src0 = Cast->getSrc(0);
5300 const Type SrcType = Src0->getType();
5301 Variable *Dest = Cast->getDest();
5302 const Type DestTy = Dest->getType();
5303 const char *HelperName = nullptr;
5304 switch (CastKind) {
5305 default:
5306 return;
5307 case InstCast::Fptosi:
5308 if (!Traits::Is64Bit && DestTy == IceType_i64) {
5309 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
5310 : H_fptosi_f64_i64;
5311 } else {
5312 return;
5313 }
5314 break;
5315 case InstCast::Fptoui:
5316 if (isVectorType(DestTy)) {
5317 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);
5318 HelperName = H_fptoui_4xi32_f32;
5319 } else if (DestTy == IceType_i64 ||
5320 (!Traits::Is64Bit && DestTy == IceType_i32)) {
5321 if (Traits::Is64Bit) {
5322 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
5323 : H_fptoui_f64_i64;
5324 } else if (isInt32Asserting32Or64(DestTy)) {
5325 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
5326 : H_fptoui_f64_i32;
5327 } else {
5328 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
5329 : H_fptoui_f64_i64;
5330 }
5331 } else {
5332 return;
5333 }
5334 break;
5335 case InstCast::Sitofp:
5336 if (!Traits::Is64Bit && SrcType == IceType_i64) {
5337 HelperName = isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
5338 : H_sitofp_i64_f64;
5339 } else {
5340 return;
5341 }
5342 break;
5343 case InstCast::Uitofp:
5344 if (isVectorType(SrcType)) {
5345 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);
5346 HelperName = H_uitofp_4xi32_4xf32;
5347 } else if (SrcType == IceType_i64 ||
5348 (!Traits::Is64Bit && SrcType == IceType_i32)) {
5349 if (isInt32Asserting32Or64(SrcType)) {
5350 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
5351 : H_uitofp_i32_f64;
5352 } else {
5353 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
5354 : H_uitofp_i64_f64;
5355 }
5356 } else {
5357 return;
5358 }
5359 break;
5360 case InstCast::Bitcast: {
5361 if (DestTy == Src0->getType())
5362 return;
5363 switch (DestTy) {
5364 default:
5365 return;
5366 case IceType_i8:
5367 assert(Src0->getType() == IceType_v8i1);
5368 HelperName = H_bitcast_8xi1_i8;
5369 break;
5370 case IceType_i16:
5371 assert(Src0->getType() == IceType_v16i1);
5372 HelperName = H_bitcast_16xi1_i16;
5373 break;
5374 case IceType_v8i1: {
5375 assert(Src0->getType() == IceType_i8);
5376 HelperName = H_bitcast_i8_8xi1;
5377 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
5378 // Arguments to functions are required to be at least 32 bits wide.
5379 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
5380 Src0 = Src0AsI32;
5381 } break;
5382 case IceType_v16i1: {
5383 assert(Src0->getType() == IceType_i16);
5384 HelperName = H_bitcast_i16_16xi1;
5385 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
5386 // Arguments to functions are required to be at least 32 bits wide.
5387 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
5388 Src0 = Src0AsI32;
5389 } break;
5390 }
5391 } break;
5392 }
5393 constexpr SizeT MaxSrcs = 1;
5394 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
5395 Call->addArg(Src0);
5396 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
5397 Context.insert(Call);
5398 Cast->setDeleted();
5399 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {
5400 std::vector<Type> ArgTypes;
5401 Type ReturnType = IceType_void;
5402 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {
5403 default:
5404 return;
5405 case Intrinsics::Ctpop: {
5406 Operand *Val = Intrinsic->getArg(0);
5407 Type ValTy = Val->getType();
5408 if (ValTy == IceType_i64)
5409 ArgTypes = {IceType_i64};
5410 else
5411 ArgTypes = {IceType_i32};
5412 ReturnType = IceType_i32;
5413 } break;
5414 case Intrinsics::Longjmp:
5415 ArgTypes = {IceType_i32, IceType_i32};
5416 ReturnType = IceType_void;
5417 break;
5418 case Intrinsics::Memcpy:
5419 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
5420 ReturnType = IceType_void;
5421 break;
5422 case Intrinsics::Memmove:
5423 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
5424 ReturnType = IceType_void;
5425 break;
5426 case Intrinsics::Memset:
5427 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};
5428 ReturnType = IceType_void;
5429 break;
5430 case Intrinsics::NaClReadTP:
5431 ReturnType = IceType_i32;
5432 break;
5433 case Intrinsics::Setjmp:
5434 ArgTypes = {IceType_i32};
5435 ReturnType = IceType_i32;
5436 break;
5437 }
5438 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
5439 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) {
5440 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);
5441 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) {
5442 if (!Ret->hasRetValue())
5443 return;
5444 Operand *RetValue = Ret->getRetValue();
5445 Type ReturnType = RetValue->getType();
5446 if (!isScalarFloatingType(ReturnType))
5447 return;
5448 StackArgumentsSize = typeWidthInBytes(ReturnType);
5449 } else {
5450 return;
5451 }
5452 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);
5453 updateMaxOutArgsSizeBytes(StackArgumentsSize);
5454 }
5455
5456 template <class Machine>
5457 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(
5458 const std::vector<Type> &ArgTypes, Type ReturnType) {
5328 uint32_t OutArgumentsSizeBytes = 0; 5459 uint32_t OutArgumentsSizeBytes = 0;
5329 uint32_t XmmArgCount = 0; 5460 uint32_t XmmArgCount = 0;
5330 uint32_t GprArgCount = 0; 5461 uint32_t GprArgCount = 0;
5331 // Classify each argument operand according to the location where the 5462 for (Type Ty : ArgTypes) {
5332 // argument is passed.
5333 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
5334 Operand *Arg = Instr->getArg(i);
5335 Type Ty = Arg->getType();
5336 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 5463 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
5337 assert(typeWidthInBytes(Ty) >= 4); 5464 assert(typeWidthInBytes(Ty) >= 4);
5338 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) { 5465 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {
5339 ++XmmArgCount; 5466 ++XmmArgCount;
5340 } else if (isScalarIntegerType(Ty) && 5467 } else if (isScalarIntegerType(Ty) &&
5341 GprArgCount < Traits::X86_MAX_GPR_ARGS) { 5468 GprArgCount < Traits::X86_MAX_GPR_ARGS) {
5342 // The 64 bit ABI allows some integers to be passed in GPRs. 5469 // The 64 bit ABI allows some integers to be passed in GPRs.
5343 ++GprArgCount; 5470 ++GprArgCount;
5344 } else { 5471 } else {
5345 if (isVectorType(Arg->getType())) { 5472 if (isVectorType(Ty)) {
5346 OutArgumentsSizeBytes = 5473 OutArgumentsSizeBytes =
5347 Traits::applyStackAlignment(OutArgumentsSizeBytes); 5474 Traits::applyStackAlignment(OutArgumentsSizeBytes);
5348 } 5475 }
5349 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 5476 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty);
5350 } 5477 }
5351 } 5478 }
5352 if (Traits::Is64Bit) 5479 if (Traits::Is64Bit)
5353 return OutArgumentsSizeBytes; 5480 return OutArgumentsSizeBytes;
5354 // The 32 bit ABI requires floating point values to be returned on the x87 FP 5481 // The 32 bit ABI requires floating point values to be returned on the x87 FP
5355 // stack. Ensure there is enough space for the fstp/movs for floating returns. 5482 // stack. Ensure there is enough space for the fstp/movs for floating returns.
5356 Variable *Dest = Instr->getDest(); 5483 if (isScalarFloatingType(ReturnType)) {
5357 if (Dest == nullptr)
5358 return OutArgumentsSizeBytes;
5359 const Type DestType = Dest->getType();
5360 if (isScalarFloatingType(Dest->getType())) {
5361 OutArgumentsSizeBytes = 5484 OutArgumentsSizeBytes =
5362 std::max(OutArgumentsSizeBytes, 5485 std::max(OutArgumentsSizeBytes,
5363 static_cast<uint32_t>(typeWidthInBytesOnStack(DestType))); 5486 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));
5364 } 5487 }
5365 return OutArgumentsSizeBytes; 5488 return OutArgumentsSizeBytes;
5366 } 5489 }
5367 5490
5368 template <class Machine> 5491 template <class Machine>
5492 uint32_t
5493 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) {
5494 // Build a vector of the arguments' types.
5495 std::vector<Type> ArgTypes;
5496 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
5497 Operand *Arg = Instr->getArg(i);
5498 ArgTypes.emplace_back(Arg->getType());
5499 }
5500 // Compute the return type (if any);
5501 Type ReturnType = IceType_void;
5502 Variable *Dest = Instr->getDest();
5503 if (Dest != nullptr)
5504 ReturnType = Dest->getType();
5505 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);
5506 }
5507
5508 template <class Machine>
5369 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) { 5509 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {
5370 Variable *Reg = makeReg(Ty, RegNum); 5510 Variable *Reg = makeReg(Ty, RegNum);
5371 switch (Ty) { 5511 switch (Ty) {
5372 case IceType_i1: 5512 case IceType_i1:
5373 case IceType_i8: 5513 case IceType_i8:
5374 case IceType_i16: 5514 case IceType_i16:
5375 case IceType_i32: 5515 case IceType_i32:
5376 case IceType_i64: 5516 case IceType_i64:
5377 // Conservatively do "mov reg, 0" to avoid modifying FLAGS. 5517 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.
5378 _mov(Reg, Ctx->getConstantZero(Ty)); 5518 _mov(Reg, Ctx->getConstantZero(Ty));
(...skipping 687 matching lines...) Expand 10 before | Expand all | Expand 10 after
6066 } 6206 }
6067 // the offset is not eligible for blinding or pooling, return the original 6207 // the offset is not eligible for blinding or pooling, return the original
6068 // mem operand 6208 // mem operand
6069 return MemOperand; 6209 return MemOperand;
6070 } 6210 }
6071 6211
6072 } // end of namespace X86Internal 6212 } // end of namespace X86Internal
6073 } // end of namespace Ice 6213 } // end of namespace Ice
6074 6214
6075 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6215 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/assembler/x86/sandboxing.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698