src/IceTargetLoweringX86BaseImpl.h - Issue 1472623002: Unify alloca, outgoing arg, and prolog construction

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1472623002: Unify alloca, outgoing arg, and prolog construction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Code review fixes. Also removed StackAdjustment. Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 285 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
296 }	296 }

297	297

298 template <class Machine> void TargetX86Base<Machine>::staticInit() {	298 template <class Machine> void TargetX86Base<Machine>::staticInit() {

299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);	299 Traits::initRegisterSet(&TypeToRegisterSet, &RegisterAliases, &ScratchRegs);

300 }	300 }

301	301

302 template <class Machine> void TargetX86Base<Machine>::translateO2() {	302 template <class Machine> void TargetX86Base<Machine>::translateO2() {

303 TimerMarker T(TimerStack::TT_O2, Func);	303 TimerMarker T(TimerStack::TT_O2, Func);

304	304

305 genTargetHelperCalls();	305 genTargetHelperCalls();

	306 Func->dump("After target helper call insertion");

306	307

307 // Merge Alloca instructions, and lay out the stack.	308 // Merge Alloca instructions, and lay out the stack.

308 static constexpr bool SortAndCombineAllocas = true;	309 static constexpr bool SortAndCombineAllocas = true;

309 Func->processAllocas(SortAndCombineAllocas);	310 Func->processAllocas(SortAndCombineAllocas);

310 Func->dump("After Alloca processing");	311 Func->dump("After Alloca processing");

311	312

312 if (!Ctx->getFlags().getPhiEdgeSplit()) {	313 if (!Ctx->getFlags().getPhiEdgeSplit()) {

313 // Lower Phi instructions.	314 // Lower Phi instructions.

314 Func->placePhiLoads();	315 Func->placePhiLoads();

315 if (Func->hasError())	316 if (Func->hasError())

(...skipping 454 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
770 return;	771 return;

771 Ostream &Str = Ctx->getStrEmit();	772 Ostream &Str = Ctx->getStrEmit();

772 if (Var->hasReg()) {	773 if (Var->hasReg()) {

773 Str << "%" << getRegName(Var->getRegNum(), Var->getType());	774 Str << "%" << getRegName(Var->getRegNum(), Var->getType());

774 return;	775 return;

775 }	776 }

776 if (Var->mustHaveReg()) {	777 if (Var->mustHaveReg()) {

777 llvm_unreachable("Infinite-weight Variable has no register assigned");	778 llvm_unreachable("Infinite-weight Variable has no register assigned");

778 }	779 }

779 const int32_t Offset = Var->getStackOffset();	780 const int32_t Offset = Var->getStackOffset();

780 int32_t OffsetAdj = 0;

781 int32_t BaseRegNum = Var->getBaseRegNum();	781 int32_t BaseRegNum = Var->getBaseRegNum();

782 if (BaseRegNum == Variable::NoRegister) {	782 if (BaseRegNum == Variable::NoRegister)

783 BaseRegNum = getFrameOrStackReg();	783 BaseRegNum = getFrameOrStackReg();

784 if (!hasFramePointer())	784 // Print in the form "Offset(%reg)", taking care that:

785 OffsetAdj = getStackAdjustment();

786 }

787 // Print in the form "OffsetAdj+Offset(%reg)", taking care that:

788 // - OffsetAdj may be 0

789 // - Offset is never printed when it is 0	785 // - Offset is never printed when it is 0

790 // - Offset may be positive or symbolic, so a "+" might be needed

791	786

792 // Only print nonzero OffsetAdj.

793 if (OffsetAdj) {

794 Str << OffsetAdj;

795 }

796 const bool DecorateAsm = Func->getContext()->getFlags().getDecorateAsm();	787 const bool DecorateAsm = Func->getContext()->getFlags().getDecorateAsm();

797 // Only print Offset when it is nonzero, regardless of DecorateAsm.	788 // Only print Offset when it is nonzero, regardless of DecorateAsm.

798 if (Offset) {	789 if (Offset) {

799 if (OffsetAdj && (DecorateAsm \|\| Offset > 0)) {

800 Str << "+";

801 }

802 if (DecorateAsm) {	790 if (DecorateAsm) {

803 Str << Var->getSymbolicStackOffset(Func);	791 Str << Var->getSymbolicStackOffset(Func);

804 } else {	792 } else {

805 Str << Offset;	793 Str << Offset;

806 }	794 }

807 }	795 }

808 const Type FrameSPTy = Traits::WordType;	796 const Type FrameSPTy = Traits::WordType;

809 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";	797 Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";

810 }	798 }

811	799

812 template <class Machine>	800 template <class Machine>

813 typename TargetX86Base<Machine>::Traits::Address	801 typename TargetX86Base<Machine>::Traits::Address

814 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {	802 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {

815 if (Var->hasReg())	803 if (Var->hasReg())

816 llvm_unreachable("Stack Variable has a register assigned");	804 llvm_unreachable("Stack Variable has a register assigned");

817 if (Var->mustHaveReg()) {	805 if (Var->mustHaveReg()) {

818 llvm_unreachable("Infinite-weight Variable has no register assigned");	806 llvm_unreachable("Infinite-weight Variable has no register assigned");

819 }	807 }

820 int32_t Offset = Var->getStackOffset();	808 int32_t Offset = Var->getStackOffset();

821 int32_t BaseRegNum = Var->getBaseRegNum();	809 int32_t BaseRegNum = Var->getBaseRegNum();

822 if (Var->getBaseRegNum() == Variable::NoRegister) {	810 if (Var->getBaseRegNum() == Variable::NoRegister)

823 BaseRegNum = getFrameOrStackReg();	811 BaseRegNum = getFrameOrStackReg();

824 if (!hasFramePointer())

825 Offset += getStackAdjustment();

826 }

827 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,	812 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,

828 AssemblerFixup::NoFixup);	813 AssemblerFixup::NoFixup);

829 }	814 }

830	815

831 /// Helper function for addProlog().	816 /// Helper function for addProlog().

832 ///	817 ///

833 /// This assumes Arg is an argument passed on the stack. This sets the frame	818 /// This assumes Arg is an argument passed on the stack. This sets the frame

834 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an	819 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

835 /// I64 arg that has been split into Lo and Hi components, it calls itself	820 /// I64 arg that has been split into Lo and Hi components, it calls itself

836 /// recursively on the components, taking care to handle Lo first because of the	821 /// recursively on the components, taking care to handle Lo first because of the

(...skipping 114 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
951	936

952 template <class Machine>	937 template <class Machine>

953 llvm::SmallBitVector	938 llvm::SmallBitVector

954 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,	939 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,

955 RegSetMask Exclude) const {	940 RegSetMask Exclude) const {

956 return Traits::getRegisterSet(Include, Exclude);	941 return Traits::getRegisterSet(Include, Exclude);

957 }	942 }

958	943

959 template <class Machine>	944 template <class Machine>

960 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {	945 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

961 if (!Inst->getKnownFrameOffset())

962 setHasFramePointer();

963 // Conservatively require the stack to be aligned. Some stack adjustment	946 // Conservatively require the stack to be aligned. Some stack adjustment

964 // operations implemented below assume that the stack is aligned before the	947 // operations implemented below assume that the stack is aligned before the

965 // alloca. All the alloca code ensures that the stack alignment is preserved	948 // alloca. All the alloca code ensures that the stack alignment is preserved

966 // after the alloca. The stack alignment restriction can be relaxed in some	949 // after the alloca. The stack alignment restriction can be relaxed in some

967 // cases.	950 // cases.

968 NeedsStackAlignment = true;	951 NeedsStackAlignment = true;

969	952

970 // TODO(stichnot): minimize the number of adjustments of esp, etc.

971 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

972 Operand *TotalSize = legalize(Inst->getSizeInBytes());

973 Variable *Dest = Inst->getDest();

974 uint32_t AlignmentParam = Inst->getAlignInBytes();

975 // For default align=0, set it to the real value 1, to avoid any	953 // For default align=0, set it to the real value 1, to avoid any

976 // bit-manipulation problems below.	954 // bit-manipulation problems below.

977 AlignmentParam = std::max(AlignmentParam, 1u);	955 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());

978	956

979 // LLVM enforces power of 2 alignment.	957 // LLVM enforces power of 2 alignment.

980 assert(llvm::isPowerOf2_32(AlignmentParam));	958 assert(llvm::isPowerOf2_32(AlignmentParam));

981 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));	959 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));

982	960

983 uint32_t Alignment =	961 const uint32_t Alignment =

984 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);	962 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

985 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {	963 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;

	964 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;

	965 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();

	966 const bool UseFramePointer =

	967 hasFramePointer() \|\| OverAligned \|\| !AllocaWithKnownOffset \|\| OptM1;

	968

	969 if (UseFramePointer)

986 setHasFramePointer();	970 setHasFramePointer();

	971

	972 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	973 if (OverAligned) {

987 _and(esp, Ctx->getConstantInt32(-Alignment));	974 _and(esp, Ctx->getConstantInt32(-Alignment));

988 }	975 }

	976

	977 Variable *Dest = Inst->getDest();

	978 Operand *TotalSize = legalize(Inst->getSizeInBytes());

	979

989 if (const auto *ConstantTotalSize =	980 if (const auto *ConstantTotalSize =

990 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {	981 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

991 uint32_t Value = ConstantTotalSize->getValue();	982 const uint32_t Value =

992 Value = Utils::applyAlignment(Value, Alignment);	983 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);

993 if (Inst->getKnownFrameOffset()) {	984 if (!UseFramePointer) {

994 _adjust_stack(Value);	985 // If we don't need a Frame Pointer, this alloca has a known offset to the

	986 // stack pointer. We don't need adjust the stack pointer, nor assign any

	987 // value to Dest, as Dest is rematerializable.

	988 assert(Dest->isRematerializable());

995 FixedAllocaSizeBytes += Value;	989 FixedAllocaSizeBytes += Value;

	990 Context.insert(InstFakeDef::create(Func, Dest));

996 } else {	991 } else {

997 _sub(esp, Ctx->getConstantInt32(Value));	992 _sub(esp, Ctx->getConstantInt32(Value));

998 }	993 }

999 } else {	994 } else {

1000 // Non-constant sizes need to be adjusted to the next highest multiple of	995 // Non-constant sizes need to be adjusted to the next highest multiple of

1001 // the required alignment at runtime.	996 // the required alignment at runtime.

1002 Variable *T = makeReg(IceType_i32);	997 Variable *T = makeReg(IceType_i32);

1003 _mov(T, TotalSize);	998 _mov(T, TotalSize);

1004 _add(T, Ctx->getConstantInt32(Alignment - 1));	999 _add(T, Ctx->getConstantInt32(Alignment - 1));

1005 _and(T, Ctx->getConstantInt32(-Alignment));	1000 _and(T, Ctx->getConstantInt32(-Alignment));

1006 _sub(esp, T);	1001 _sub(esp, T);

1007 }	1002 }

1008 _mov(Dest, esp);	1003 // Add enough to the returned address to account for the out args area.

	1004 uint32_t OutArgsSize = maxOutArgsSizeBytes();

	1005 if (OutArgsSize > 0) {

	1006 Variable *T = makeReg(IceType_i32);

	1007 typename Traits::X86OperandMem *CalculateOperand =

	1008 Traits::X86OperandMem::create(

	1009 Func, IceType_i32, esp,

	1010 Ctx->getConstantInt(IceType_i32, OutArgsSize));

	1011 _lea(T, CalculateOperand);

	1012 _mov(Dest, T);

	1013 } else {

	1014 _mov(Dest, esp);

	1015 }

1009 }	1016 }

1010	1017

1011 /// Strength-reduce scalar integer multiplication by a constant (for i32 or	1018 /// Strength-reduce scalar integer multiplication by a constant (for i32 or

1012 /// narrower) for certain constants. The lea instruction can be used to multiply	1019 /// narrower) for certain constants. The lea instruction can be used to multiply

1013 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of	1020 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of

1014 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2	1021 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2

1015 /// lea-based multiplies by 5, combined with left-shifting by 2.	1022 /// lea-based multiplies by 5, combined with left-shifting by 2.

1016 template <class Machine>	1023 template <class Machine>

1017 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,	1024 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,

1018 int32_t Src1) {	1025 int32_t Src1) {

(...skipping 329 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1348 (void)SwapCount;	1355 (void)SwapCount;

1349 }	1356 }

1350 if (!Traits::Is64Bit && Ty == IceType_i64) {	1357 if (!Traits::Is64Bit && Ty == IceType_i64) {

1351 // These x86-32 helper-call-involved instructions are lowered in this	1358 // These x86-32 helper-call-involved instructions are lowered in this

1352 // separate switch. This is because loOperand() and hiOperand() may insert	1359 // separate switch. This is because loOperand() and hiOperand() may insert

1353 // redundant instructions for constant blinding and pooling. Such redundant	1360 // redundant instructions for constant blinding and pooling. Such redundant

1354 // instructions will fail liveness analysis under -Om1 setting. And,	1361 // instructions will fail liveness analysis under -Om1 setting. And,

1355 // actually these arguments do not need to be processed with loOperand()	1362 // actually these arguments do not need to be processed with loOperand()

1356 // and hiOperand() to be used.	1363 // and hiOperand() to be used.

1357 switch (Inst->getOp()) {	1364 switch (Inst->getOp()) {

1358 case InstArithmetic::Udiv: {	1365 case InstArithmetic::Udiv:

1359 constexpr SizeT MaxSrcs = 2;	1366 case InstArithmetic::Sdiv:

1360 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);	1367 case InstArithmetic::Urem:

1361 Call->addArg(Inst->getSrc(0));	1368 case InstArithmetic::Srem:

1362 Call->addArg(Inst->getSrc(1));	1369 llvm::report_fatal_error("Helper call was expected");

1363 lowerCall(Call);

1364 return;	1370 return;

1365 }

1366 case InstArithmetic::Sdiv: {

1367 constexpr SizeT MaxSrcs = 2;

1368 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);

1369 Call->addArg(Inst->getSrc(0));

1370 Call->addArg(Inst->getSrc(1));

1371 lowerCall(Call);

1372 return;

1373 }

1374 case InstArithmetic::Urem: {

1375 constexpr SizeT MaxSrcs = 2;

1376 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);

1377 Call->addArg(Inst->getSrc(0));

1378 Call->addArg(Inst->getSrc(1));

1379 lowerCall(Call);

1380 return;

1381 }

1382 case InstArithmetic::Srem: {

1383 constexpr SizeT MaxSrcs = 2;

1384 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);

1385 Call->addArg(Inst->getSrc(0));

1386 Call->addArg(Inst->getSrc(1));

1387 lowerCall(Call);

1388 return;

1389 }

1390 default:	1371 default:

1391 break;	1372 break;

1392 }	1373 }

1393	1374

1394 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1375 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1395 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1376 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1396 Operand *Src0Lo = loOperand(Src0);	1377 Operand *Src0Lo = loOperand(Src0);

1397 Operand *Src0Hi = hiOperand(Src0);	1378 Operand *Src0Hi = hiOperand(Src0);

1398 Operand *Src1Lo = loOperand(Src1);	1379 Operand *Src1Lo = loOperand(Src1);

1399 Operand *Src1Hi = hiOperand(Src1);	1380 Operand *Src1Hi = hiOperand(Src1);

(...skipping 174 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1574 Variable *T4 = makeReg(IceType_v4i32);	1555 Variable *T4 = makeReg(IceType_v4i32);

1575 _movp(T1, Src0);	1556 _movp(T1, Src0);

1576 _pshufd(T2, Src0, Mask1030);	1557 _pshufd(T2, Src0, Mask1030);

1577 _pshufd(T3, Src1, Mask1030);	1558 _pshufd(T3, Src1, Mask1030);

1578 _pmuludq(T1, Src1);	1559 _pmuludq(T1, Src1);

1579 _pmuludq(T2, T3);	1560 _pmuludq(T2, T3);

1580 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));	1561 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

1581 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));	1562 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));

1582 _movp(Dest, T4);	1563 _movp(Dest, T4);

1583 } else if (Ty == IceType_v16i8) {	1564 } else if (Ty == IceType_v16i8) {

1584 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1565 llvm::report_fatal_error("Scalarized operation was expected");

1585 } else {	1566 } else {

1586 llvm::report_fatal_error("Invalid vector multiply type");	1567 llvm::report_fatal_error("Invalid vector multiply type");

1587 }	1568 }

1588 } break;	1569 } break;

1589 case InstArithmetic::Shl:	1570 case InstArithmetic::Shl:

1590 case InstArithmetic::Lshr:	1571 case InstArithmetic::Lshr:

1591 case InstArithmetic::Ashr:	1572 case InstArithmetic::Ashr:

1592 case InstArithmetic::Udiv:	1573 case InstArithmetic::Udiv:

1593 case InstArithmetic::Urem:	1574 case InstArithmetic::Urem:

1594 case InstArithmetic::Sdiv:	1575 case InstArithmetic::Sdiv:

1595 case InstArithmetic::Srem:	1576 case InstArithmetic::Srem:

1596 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1577 llvm::report_fatal_error("Scalarized operation was expected");

1597 break;	1578 break;

1598 case InstArithmetic::Fadd: {	1579 case InstArithmetic::Fadd: {

1599 Variable *T = makeReg(Ty);	1580 Variable *T = makeReg(Ty);

1600 _movp(T, Src0);	1581 _movp(T, Src0);

1601 _addps(T, Src1);	1582 _addps(T, Src1);

1602 _movp(Dest, T);	1583 _movp(Dest, T);

1603 } break;	1584 } break;

1604 case InstArithmetic::Fsub: {	1585 case InstArithmetic::Fsub: {

1605 Variable *T = makeReg(Ty);	1586 Variable *T = makeReg(Ty);

1606 _movp(T, Src0);	1587 _movp(T, Src0);

1607 _subps(T, Src1);	1588 _subps(T, Src1);

1608 _movp(Dest, T);	1589 _movp(Dest, T);

1609 } break;	1590 } break;

1610 case InstArithmetic::Fmul: {	1591 case InstArithmetic::Fmul: {

1611 Variable *T = makeReg(Ty);	1592 Variable *T = makeReg(Ty);

1612 _movp(T, Src0);	1593 _movp(T, Src0);

1613 _mulps(T, Src0 == Src1 ? T : Src1);	1594 _mulps(T, Src0 == Src1 ? T : Src1);

1614 _movp(Dest, T);	1595 _movp(Dest, T);

1615 } break;	1596 } break;

1616 case InstArithmetic::Fdiv: {	1597 case InstArithmetic::Fdiv: {

1617 Variable *T = makeReg(Ty);	1598 Variable *T = makeReg(Ty);

1618 _movp(T, Src0);	1599 _movp(T, Src0);

1619 _divps(T, Src1);	1600 _divps(T, Src1);

1620 _movp(Dest, T);	1601 _movp(Dest, T);

1621 } break;	1602 } break;

1622 case InstArithmetic::Frem:	1603 case InstArithmetic::Frem:

1623 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1604 llvm::report_fatal_error("Scalarized operation was expected");

1624 break;	1605 break;

1625 }	1606 }

1626 return;	1607 return;

1627 }	1608 }

1628 Variable *T_edx = nullptr;	1609 Variable *T_edx = nullptr;

1629 Variable *T = nullptr;	1610 Variable *T = nullptr;

1630 switch (Inst->getOp()) {	1611 switch (Inst->getOp()) {

1631 case InstArithmetic::_num:	1612 case InstArithmetic::_num:

1632 llvm_unreachable("Unknown arithmetic operator");	1613 llvm_unreachable("Unknown arithmetic operator");

1633 break;	1614 break;

(...skipping 250 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1884 case InstArithmetic::Fmul:	1865 case InstArithmetic::Fmul:

1885 _mov(T, Src0);	1866 _mov(T, Src0);

1886 _mulss(T, Src0 == Src1 ? T : Src1);	1867 _mulss(T, Src0 == Src1 ? T : Src1);

1887 _mov(Dest, T);	1868 _mov(Dest, T);

1888 break;	1869 break;

1889 case InstArithmetic::Fdiv:	1870 case InstArithmetic::Fdiv:

1890 _mov(T, Src0);	1871 _mov(T, Src0);

1891 _divss(T, Src1);	1872 _divss(T, Src1);

1892 _mov(Dest, T);	1873 _mov(Dest, T);

1893 break;	1874 break;

1894 case InstArithmetic::Frem: {	1875 case InstArithmetic::Frem:

1895 constexpr SizeT MaxSrcs = 2;	1876 llvm::report_fatal_error("Helper call was expected");

1896 InstCall *Call = makeHelperCall(	1877 break;

1897 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1898 Call->addArg(Src0);

1899 Call->addArg(Src1);

1900 return lowerCall(Call);

1901 }

1902 }	1878 }

1903 }	1879 }

1904	1880

1905 template <class Machine>	1881 template <class Machine>

1906 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {	1882 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {

1907 Variable *Dest = Inst->getDest();	1883 Variable *Dest = Inst->getDest();

1908 if (Dest->isRematerializable()) {	1884 if (Dest->isRematerializable()) {

1909 Context.insert(InstFakeDef::create(Func, Dest));	1885 Context.insert(InstFakeDef::create(Func, Dest));

1910 return;	1886 return;

1911 }	1887 }

(...skipping 242 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2154 if (isVectorType(DestTy)) {	2130 if (isVectorType(DestTy)) {

2155 assert(DestTy == IceType_v4i32 &&	2131 assert(DestTy == IceType_v4i32 &&

2156 Inst->getSrc(0)->getType() == IceType_v4f32);	2132 Inst->getSrc(0)->getType() == IceType_v4f32);

2157 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2133 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2158 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2134 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2159 Src0RM = legalizeToReg(Src0RM);	2135 Src0RM = legalizeToReg(Src0RM);

2160 Variable *T = makeReg(DestTy);	2136 Variable *T = makeReg(DestTy);

2161 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);	2137 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

2162 _movp(Dest, T);	2138 _movp(Dest, T);

2163 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {	2139 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {

2164 constexpr SizeT MaxSrcs = 1;	2140 llvm::report_fatal_error("Helper call was expected");

2165 Type SrcType = Inst->getSrc(0)->getType();

2166 InstCall *Call =

2167 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2168 : H_fptosi_f64_i64,

2169 Dest, MaxSrcs);

2170 Call->addArg(Inst->getSrc(0));

2171 lowerCall(Call);

2172 } else {	2141 } else {

2173 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2142 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2174 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2143 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2175 Variable *T_1 = nullptr;	2144 Variable *T_1 = nullptr;

2176 if (Traits::Is64Bit && DestTy == IceType_i64) {	2145 if (Traits::Is64Bit && DestTy == IceType_i64) {

2177 T_1 = makeReg(IceType_i64);	2146 T_1 = makeReg(IceType_i64);

2178 } else {	2147 } else {

2179 assert(DestTy != IceType_i64);	2148 assert(DestTy != IceType_i64);

2180 T_1 = makeReg(IceType_i32);	2149 T_1 = makeReg(IceType_i32);

2181 }	2150 }

2182 // cvt() requires its integer argument to be a GPR.	2151 // cvt() requires its integer argument to be a GPR.

2183 Variable *T_2 = makeReg(DestTy);	2152 Variable *T_2 = makeReg(DestTy);

2184 if (isByteSizedType(DestTy)) {	2153 if (isByteSizedType(DestTy)) {

2185 assert(T_1->getType() == IceType_i32);	2154 assert(T_1->getType() == IceType_i32);

2186 T_1->setRegClass(RCX86_Is32To8);	2155 T_1->setRegClass(RCX86_Is32To8);

2187 T_2->setRegClass(RCX86_IsTrunc8Rcvr);	2156 T_2->setRegClass(RCX86_IsTrunc8Rcvr);

2188 }	2157 }

2189 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);	2158 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);

2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2159 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2191 if (DestTy == IceType_i1)	2160 if (DestTy == IceType_i1)

2192 _and(T_2, Ctx->getConstantInt1(1));	2161 _and(T_2, Ctx->getConstantInt1(1));

2193 _mov(Dest, T_2);	2162 _mov(Dest, T_2);

2194 }	2163 }

2195 break;	2164 break;

2196 case InstCast::Fptoui:	2165 case InstCast::Fptoui:

2197 if (isVectorType(DestTy)) {	2166 if (isVectorType(DestTy)) {

2198 assert(DestTy == IceType_v4i32 &&	2167 llvm::report_fatal_error("Helper call was expected");

2199 Inst->getSrc(0)->getType() == IceType_v4f32);

2200 constexpr SizeT MaxSrcs = 1;

2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);

2202 Call->addArg(Inst->getSrc(0));

2203 lowerCall(Call);

2204 } else if (DestTy == IceType_i64 \|\|	2168 } else if (DestTy == IceType_i64 \|\|

2205 (!Traits::Is64Bit && DestTy == IceType_i32)) {	2169 (!Traits::Is64Bit && DestTy == IceType_i32)) {

2206 // Use a helper for both x86-32 and x86-64.	2170 llvm::report_fatal_error("Helper call was expected");

2207 constexpr SizeT MaxSrcs = 1;

2208 Type SrcType = Inst->getSrc(0)->getType();

2209 IceString TargetString;

2210 if (Traits::Is64Bit) {

2211 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2212 : H_fptoui_f64_i64;

2213 } else if (isInt32Asserting32Or64(DestTy)) {

2214 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

2215 : H_fptoui_f64_i32;

2216 } else {

2217 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2218 : H_fptoui_f64_i64;

2219 }

2220 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2221 Call->addArg(Inst->getSrc(0));

2222 lowerCall(Call);

2223 return;

2224 } else {	2171 } else {

2225 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2172 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2226 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2173 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2227 assert(DestTy != IceType_i64);	2174 assert(DestTy != IceType_i64);

2228 Variable *T_1 = nullptr;	2175 Variable *T_1 = nullptr;

2229 if (Traits::Is64Bit && DestTy == IceType_i32) {	2176 if (Traits::Is64Bit && DestTy == IceType_i32) {

2230 T_1 = makeReg(IceType_i64);	2177 T_1 = makeReg(IceType_i64);

2231 } else {	2178 } else {

2232 assert(DestTy != IceType_i32);	2179 assert(DestTy != IceType_i32);

2233 T_1 = makeReg(IceType_i32);	2180 T_1 = makeReg(IceType_i32);

(...skipping 15 matching lines...) Expand all Loading...
2249 if (isVectorType(DestTy)) {	2196 if (isVectorType(DestTy)) {

2250 assert(DestTy == IceType_v4f32 &&	2197 assert(DestTy == IceType_v4f32 &&

2251 Inst->getSrc(0)->getType() == IceType_v4i32);	2198 Inst->getSrc(0)->getType() == IceType_v4i32);

2252 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2199 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2253 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2200 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2254 Src0RM = legalizeToReg(Src0RM);	2201 Src0RM = legalizeToReg(Src0RM);

2255 Variable *T = makeReg(DestTy);	2202 Variable *T = makeReg(DestTy);

2256 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);	2203 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);

2257 _movp(Dest, T);	2204 _movp(Dest, T);

2258 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {	2205 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {

2259 // Use a helper for x86-32.	2206 llvm::report_fatal_error("Helper call was expected");

2260 constexpr SizeT MaxSrcs = 1;

2261 InstCall *Call =

2262 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32

2263 : H_sitofp_i64_f64,

2264 Dest, MaxSrcs);

2265 // TODO: Call the correct compiler-rt helper function.

2266 Call->addArg(Inst->getSrc(0));

2267 lowerCall(Call);

2268 return;

2269 } else {	2207 } else {

2270 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2208 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2271 // Sign-extend the operand.	2209 // Sign-extend the operand.

2272 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2	2210 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

2273 Variable *T_1 = nullptr;	2211 Variable *T_1 = nullptr;

2274 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {	2212 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {

2275 T_1 = makeReg(IceType_i64);	2213 T_1 = makeReg(IceType_i64);

2276 } else {	2214 } else {

2277 assert(Src0RM->getType() != IceType_i64);	2215 assert(Src0RM->getType() != IceType_i64);

2278 T_1 = makeReg(IceType_i32);	2216 T_1 = makeReg(IceType_i32);

2279 }	2217 }

2280 Variable *T_2 = makeReg(DestTy);	2218 Variable *T_2 = makeReg(DestTy);

2281 if (Src0RM->getType() == T_1->getType())	2219 if (Src0RM->getType() == T_1->getType())

2282 _mov(T_1, Src0RM);	2220 _mov(T_1, Src0RM);

2283 else	2221 else

2284 _movsx(T_1, Src0RM);	2222 _movsx(T_1, Src0RM);

2285 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2223 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2286 _mov(Dest, T_2);	2224 _mov(Dest, T_2);

2287 }	2225 }

2288 break;	2226 break;

2289 case InstCast::Uitofp: {	2227 case InstCast::Uitofp: {

2290 Operand *Src0 = Inst->getSrc(0);	2228 Operand *Src0 = Inst->getSrc(0);

2291 if (isVectorType(Src0->getType())) {	2229 if (isVectorType(Src0->getType())) {

2292 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32);	2230 llvm::report_fatal_error("Helper call was expected");

2293 constexpr SizeT MaxSrcs = 1;

2294 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2295 Call->addArg(Src0);

2296 lowerCall(Call);

2297 } else if (Src0->getType() == IceType_i64 \|\|	2231 } else if (Src0->getType() == IceType_i64 \|\|

2298 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {	2232 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

2299 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on	2233 llvm::report_fatal_error("Helper call was expected");

2300 // x86-32.

2301 constexpr SizeT MaxSrcs = 1;

2302 IceString TargetString;

2303 if (isInt32Asserting32Or64(Src0->getType())) {

2304 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32

2305 : H_uitofp_i32_f64;

2306 } else {

2307 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32

2308 : H_uitofp_i64_f64;

2309 }

2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2311 Call->addArg(Src0);

2312 lowerCall(Call);

2313 return;

2314 } else {	2234 } else {

2315 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2235 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2316 // Zero-extend the operand.	2236 // Zero-extend the operand.

2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2	2237 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

2318 Variable *T_1 = nullptr;	2238 Variable *T_1 = nullptr;

2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {	2239 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {

2320 T_1 = makeReg(IceType_i64);	2240 T_1 = makeReg(IceType_i64);

2321 } else {	2241 } else {

2322 assert(Src0RM->getType() != IceType_i64);	2242 assert(Src0RM->getType() != IceType_i64);

2323 assert(Traits::Is64Bit \|\| Src0RM->getType() != IceType_i32);	2243 assert(Traits::Is64Bit \|\| Src0RM->getType() != IceType_i32);

(...skipping 13 matching lines...) Expand all Loading...
2337 Operand *Src0 = Inst->getSrc(0);	2257 Operand *Src0 = Inst->getSrc(0);

2338 if (DestTy == Src0->getType()) {	2258 if (DestTy == Src0->getType()) {

2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);	2259 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);

2340 lowerAssign(Assign);	2260 lowerAssign(Assign);

2341 return;	2261 return;

2342 }	2262 }

2343 switch (DestTy) {	2263 switch (DestTy) {

2344 default:	2264 default:

2345 llvm_unreachable("Unexpected Bitcast dest type");	2265 llvm_unreachable("Unexpected Bitcast dest type");

2346 case IceType_i8: {	2266 case IceType_i8: {

2347 assert(Src0->getType() == IceType_v8i1);	2267 llvm::report_fatal_error("Helper call was expected");

2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);

2349 Call->addArg(Src0);

2350 lowerCall(Call);

2351 } break;	2268 } break;

2352 case IceType_i16: {	2269 case IceType_i16: {

2353 assert(Src0->getType() == IceType_v16i1);	2270 llvm::report_fatal_error("Helper call was expected");

2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);

2355 Call->addArg(Src0);

2356 lowerCall(Call);

2357 } break;	2271 } break;

2358 case IceType_i32:	2272 case IceType_i32:

2359 case IceType_f32: {	2273 case IceType_f32: {

2360 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2274 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2361 Type SrcType = Src0RM->getType();	2275 Type SrcType = Src0RM->getType();

2362 assert((DestTy == IceType_i32 && SrcType == IceType_f32) \|\|	2276 assert((DestTy == IceType_i32 && SrcType == IceType_f32) \|\|

2363 (DestTy == IceType_f32 && SrcType == IceType_i32));	2277 (DestTy == IceType_f32 && SrcType == IceType_i32));

2364 // a.i32 = bitcast b.f32 ==>	2278 // a.i32 = bitcast b.f32 ==>

2365 // t.f32 = b.f32	2279 // t.f32 = b.f32

2366 // s.f32 = spill t.f32	2280 // s.f32 = spill t.f32

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2462 // SpillLo is considered a "use" of Spill so define Spill before it is	2376 // SpillLo is considered a "use" of Spill so define Spill before it is

2463 // used.	2377 // used.

2464 Context.insert(InstFakeDef::create(Func, Spill));	2378 Context.insert(InstFakeDef::create(Func, Spill));

2465 _store(T_Lo, SpillLo);	2379 _store(T_Lo, SpillLo);

2466 _mov(T_Hi, hiOperand(Src0));	2380 _mov(T_Hi, hiOperand(Src0));

2467 _store(T_Hi, SpillHi);	2381 _store(T_Hi, SpillHi);

2468 _movq(Dest, Spill);	2382 _movq(Dest, Spill);

2469 }	2383 }

2470 } break;	2384 } break;

2471 case IceType_v8i1: {	2385 case IceType_v8i1: {

2472 assert(Src0->getType() == IceType_i8);	2386 llvm::report_fatal_error("Helper call was expected");

2473 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);

2474 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

2475 // Arguments to functions are required to be at least 32 bits wide.

2476 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

2477 Call->addArg(Src0AsI32);

2478 lowerCall(Call);

2479 } break;	2387 } break;

2480 case IceType_v16i1: {	2388 case IceType_v16i1: {

2481 assert(Src0->getType() == IceType_i16);	2389 llvm::report_fatal_error("Helper call was expected");

2482 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);

2483 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

2484 // Arguments to functions are required to be at least 32 bits wide.

2485 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

2486 Call->addArg(Src0AsI32);

2487 lowerCall(Call);

2488 } break;	2390 } break;

2489 case IceType_v8i16:	2391 case IceType_v8i16:

2490 case IceType_v16i8:	2392 case IceType_v16i8:

2491 case IceType_v4i32:	2393 case IceType_v4i32:

2492 case IceType_v4f32: {	2394 case IceType_v4f32: {

2493 _movp(Dest, legalizeToReg(Src0));	2395 _movp(Dest, legalizeToReg(Src0));

2494 } break;	2396 } break;

2495 }	2397 }

2496 break;	2398 break;

2497 }	2399 }

(...skipping 2661 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5159 Type Ty = Dest->getType();	5061 Type Ty = Dest->getType();

5160 Type ElementTy = typeElementType(Ty);	5062 Type ElementTy = typeElementType(Ty);

5161 SizeT NumElements = typeNumElements(Ty);	5063 SizeT NumElements = typeNumElements(Ty);

5162	5064

5163 Operand *T = Ctx->getConstantUndef(Ty);	5065 Operand *T = Ctx->getConstantUndef(Ty);

5164 for (SizeT I = 0; I < NumElements; ++I) {	5066 for (SizeT I = 0; I < NumElements; ++I) {

5165 Constant *Index = Ctx->getConstantInt32(I);	5067 Constant *Index = Ctx->getConstantInt32(I);

5166	5068

5167 // Extract the next two inputs.	5069 // Extract the next two inputs.

5168 Variable *Op0 = Func->makeVariable(ElementTy);	5070 Variable *Op0 = Func->makeVariable(ElementTy);

5169 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));	5071 Context.insert(InstExtractElement::create(Func, Op0, Src0, Index));

5170 Variable *Op1 = Func->makeVariable(ElementTy);	5072 Variable *Op1 = Func->makeVariable(ElementTy);

5171 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));	5073 Context.insert(InstExtractElement::create(Func, Op1, Src1, Index));

5172	5074

5173 // Perform the arithmetic as a scalar operation.	5075 // Perform the arithmetic as a scalar operation.

5174 Variable *Res = Func->makeVariable(ElementTy);	5076 Variable *Res = Func->makeVariable(ElementTy);

5175 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));	5077 auto *Arith = InstArithmetic::create(Func, Kind, Res, Op0, Op1);

	5078 Context.insert(Arith);

	5079 // We might have created an operation that needed a helper call.

	5080 genTargetHelperCallFor(Arith);

5176	5081

5177 // Insert the result into position.	5082 // Insert the result into position.

5178 Variable *DestT = Func->makeVariable(Ty);	5083 Variable *DestT = Func->makeVariable(Ty);

5179 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));	5084 Context.insert(InstInsertElement::create(Func, DestT, T, Res, Index));

5180 T = DestT;	5085 T = DestT;

5181 }	5086 }

5182	5087

5183 lowerAssign(InstAssign::create(Func, Dest, T));	5088 Context.insert(InstAssign::create(Func, Dest, T));

5184 }	5089 }

5185	5090

5186 /// The following pattern occurs often in lowered C and C++ code:	5091 /// The following pattern occurs often in lowered C and C++ code:

5187 ///	5092 ///

5188 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1	5093 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1

5189 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>	5094 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>

5190 ///	5095 ///

5191 /// We can eliminate the sext operation by copying the result of pcmpeqd,	5096 /// We can eliminate the sext operation by copying the result of pcmpeqd,

5192 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the	5097 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the

5193 /// sext operation.	5098 /// sext operation.

(...skipping 122 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5316 }	5221 }

5317	5222

5318 // Pause constant blinding or pooling, blinding or pooling will be done later	5223 // Pause constant blinding or pooling, blinding or pooling will be done later

5319 // during phi lowering assignments	5224 // during phi lowering assignments

5320 BoolFlagSaver B(RandomizationPoolingPaused, true);	5225 BoolFlagSaver B(RandomizationPoolingPaused, true);

5321 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(	5226 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(

5322 this, Context.getNode(), Func);	5227 this, Context.getNode(), Func);

5323 }	5228 }

5324	5229

5325 template <class Machine>	5230 template <class Machine>

5326 uint32_t	5231 void TargetX86Base<Machine>::genTargetHelperCallFor(Inst *Instr) {

5327 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) {	5232 uint32_t StackArgumentsSize = 0;

	5233 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {

	5234 const char *HelperName = nullptr;

	5235 Variable *Dest = Arith->getDest();

	5236 Type DestTy = Dest->getType();

	5237 if (!Traits::Is64Bit && DestTy == IceType_i64) {

	5238 switch (Arith->getOp()) {

	5239 default:

	5240 return;

	5241 case InstArithmetic::Udiv:

	5242 HelperName = H_udiv_i64;

	5243 break;

	5244 case InstArithmetic::Sdiv:

	5245 HelperName = H_sdiv_i64;

	5246 break;

	5247 case InstArithmetic::Urem:

	5248 HelperName = H_urem_i64;

	5249 break;

	5250 case InstArithmetic::Srem:

	5251 HelperName = H_srem_i64;

	5252 break;

	5253 }

	5254 } else if (isVectorType(DestTy)) {

	5255 Variable *Dest = Arith->getDest();

	5256 Operand *Src0 = Arith->getSrc(0);

	5257 Operand *Src1 = Arith->getSrc(1);

	5258 switch (Arith->getOp()) {

	5259 default:

	5260 return;

	5261 case InstArithmetic::Mul:

	5262 if (DestTy == IceType_v16i8) {

	5263 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);

	5264 Arith->setDeleted();

	5265 }

	5266 return;

	5267 case InstArithmetic::Shl:

	5268 case InstArithmetic::Lshr:

	5269 case InstArithmetic::Ashr:

	5270 case InstArithmetic::Udiv:

	5271 case InstArithmetic::Urem:

	5272 case InstArithmetic::Sdiv:

	5273 case InstArithmetic::Srem:

	5274 case InstArithmetic::Frem:

	5275 scalarizeArithmetic(Arith->getOp(), Dest, Src0, Src1);

	5276 Arith->setDeleted();

	5277 return;

	5278 }

	5279 } else {

	5280 switch (Arith->getOp()) {

	5281 default:

	5282 return;

	5283 case InstArithmetic::Frem:

	5284 if (isFloat32Asserting32Or64(DestTy))

	5285 HelperName = H_frem_f32;

	5286 else

	5287 HelperName = H_frem_f64;

	5288 }

	5289 }

	5290 constexpr SizeT MaxSrcs = 2;

	5291 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

	5292 Call->addArg(Arith->getSrc(0));

	5293 Call->addArg(Arith->getSrc(1));

	5294 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);

	5295 Context.insert(Call);

	5296 Arith->setDeleted();

	5297 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

	5298 InstCast::OpKind CastKind = Cast->getCastKind();

	5299 Operand *Src0 = Cast->getSrc(0);

	5300 const Type SrcType = Src0->getType();

	5301 Variable *Dest = Cast->getDest();

	5302 const Type DestTy = Dest->getType();

	5303 const char *HelperName = nullptr;

	5304 switch (CastKind) {

	5305 default:

	5306 return;

	5307 case InstCast::Fptosi:

	5308 if (!Traits::Is64Bit && DestTy == IceType_i64) {

	5309 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

	5310 : H_fptosi_f64_i64;

	5311 } else {

	5312 return;

	5313 }

	5314 break;

	5315 case InstCast::Fptoui:

	5316 if (isVectorType(DestTy)) {

	5317 assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);

	5318 HelperName = H_fptoui_4xi32_f32;

	5319 } else if (DestTy == IceType_i64 \|\|

	5320 (!Traits::Is64Bit && DestTy == IceType_i32)) {

	5321 if (Traits::Is64Bit) {

	5322 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

	5323 : H_fptoui_f64_i64;

	5324 } else if (isInt32Asserting32Or64(DestTy)) {

	5325 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

	5326 : H_fptoui_f64_i32;

	5327 } else {

	5328 HelperName = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

	5329 : H_fptoui_f64_i64;

	5330 }

	5331 } else {

	5332 return;

	5333 }

	5334 break;

	5335 case InstCast::Sitofp:

	5336 if (!Traits::Is64Bit && SrcType == IceType_i64) {

	5337 HelperName = isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32

	5338 : H_sitofp_i64_f64;

	5339 } else {

	5340 return;

	5341 }

	5342 break;

	5343 case InstCast::Uitofp:

	5344 if (isVectorType(SrcType)) {

	5345 assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);

	5346 HelperName = H_uitofp_4xi32_4xf32;

	5347 } else if (SrcType == IceType_i64 \|\|

	5348 (!Traits::Is64Bit && SrcType == IceType_i32)) {

	5349 if (isInt32Asserting32Or64(SrcType)) {

	5350 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32

	5351 : H_uitofp_i32_f64;

	5352 } else {

	5353 HelperName = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32

	5354 : H_uitofp_i64_f64;

	5355 }

	5356 } else {

	5357 return;

	5358 }

	5359 break;

	5360 case InstCast::Bitcast: {

	5361 if (DestTy == Src0->getType())

	5362 return;

	5363 switch (DestTy) {

	5364 default:

	5365 return;

	5366 case IceType_i8:

	5367 assert(Src0->getType() == IceType_v8i1);

	5368 HelperName = H_bitcast_8xi1_i8;

	5369 break;

	5370 case IceType_i16:

	5371 assert(Src0->getType() == IceType_v16i1);

	5372 HelperName = H_bitcast_16xi1_i16;

	5373 break;

	5374 case IceType_v8i1: {

	5375 assert(Src0->getType() == IceType_i8);

	5376 HelperName = H_bitcast_i8_8xi1;

	5377 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

	5378 // Arguments to functions are required to be at least 32 bits wide.

	5379 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

	5380 Src0 = Src0AsI32;

	5381 } break;

	5382 case IceType_v16i1: {

	5383 assert(Src0->getType() == IceType_i16);

	5384 HelperName = H_bitcast_i16_16xi1;

	5385 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

	5386 // Arguments to functions are required to be at least 32 bits wide.

	5387 Context.insert(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

	5388 Src0 = Src0AsI32;

	5389 } break;

	5390 }

	5391 } break;

	5392 }

	5393 constexpr SizeT MaxSrcs = 1;

	5394 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

	5395 Call->addArg(Src0);

	5396 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);

	5397 Context.insert(Call);

	5398 Cast->setDeleted();

	5399 } else if (auto *Intrinsic = llvm::dyn_cast<InstIntrinsicCall>(Instr)) {

	5400 std::vector<Type> ArgTypes;

	5401 Type ReturnType = IceType_void;

	5402 switch (Intrinsics::IntrinsicID ID = Intrinsic->getIntrinsicInfo().ID) {

	5403 default:

	5404 return;

	5405 case Intrinsics::Ctpop: {

	5406 Operand *Val = Intrinsic->getArg(0);

	5407 Type ValTy = Val->getType();

	5408 if (ValTy == IceType_i64)

	5409 ArgTypes = {IceType_i64};

	5410 else

	5411 ArgTypes = {IceType_i32};

	5412 ReturnType = IceType_i32;

	5413 } break;

	5414 case Intrinsics::Longjmp:

	5415 ArgTypes = {IceType_i32, IceType_i32};

	5416 ReturnType = IceType_void;

	5417 break;

	5418 case Intrinsics::Memcpy:

	5419 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};

	5420 ReturnType = IceType_void;

	5421 break;

	5422 case Intrinsics::Memmove:

	5423 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};

	5424 ReturnType = IceType_void;

	5425 break;

	5426 case Intrinsics::Memset:

	5427 ArgTypes = {IceType_i32, IceType_i32, IceType_i32};

	5428 ReturnType = IceType_void;

	5429 break;

	5430 case Intrinsics::NaClReadTP:

	5431 ReturnType = IceType_i32;

	5432 break;

	5433 case Intrinsics::Setjmp:

	5434 ArgTypes = {IceType_i32};

	5435 ReturnType = IceType_i32;

	5436 break;

	5437 }

	5438 StackArgumentsSize = getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);

	5439 } else if (auto *Call = llvm::dyn_cast<InstCall>(Instr)) {

	5440 StackArgumentsSize = getCallStackArgumentsSizeBytes(Call);

	5441 } else if (auto *Ret = llvm::dyn_cast<InstRet>(Instr)) {

	5442 if (!Ret->hasRetValue())

	5443 return;

	5444 Operand *RetValue = Ret->getRetValue();

	5445 Type ReturnType = RetValue->getType();

	5446 if (!isScalarFloatingType(ReturnType))

	5447 return;

	5448 StackArgumentsSize = typeWidthInBytes(ReturnType);

	5449 } else {

	5450 return;

	5451 }

	5452 StackArgumentsSize = Traits::applyStackAlignment(StackArgumentsSize);

	5453 updateMaxOutArgsSizeBytes(StackArgumentsSize);

	5454 }

	5455

	5456 template <class Machine>

	5457 uint32_t TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(

	5458 const std::vector<Type> &ArgTypes, Type ReturnType) {

5328 uint32_t OutArgumentsSizeBytes = 0;	5459 uint32_t OutArgumentsSizeBytes = 0;

5329 uint32_t XmmArgCount = 0;	5460 uint32_t XmmArgCount = 0;

5330 uint32_t GprArgCount = 0;	5461 uint32_t GprArgCount = 0;

5331 // Classify each argument operand according to the location where the	5462 for (Type Ty : ArgTypes) {

5332 // argument is passed.

5333 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

5334 Operand *Arg = Instr->getArg(i);

5335 Type Ty = Arg->getType();

5336 // The PNaCl ABI requires the width of arguments to be at least 32 bits.	5463 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

5337 assert(typeWidthInBytes(Ty) >= 4);	5464 assert(typeWidthInBytes(Ty) >= 4);

5338 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {	5465 if (isVectorType(Ty) && XmmArgCount < Traits::X86_MAX_XMM_ARGS) {

5339 ++XmmArgCount;	5466 ++XmmArgCount;

5340 } else if (isScalarIntegerType(Ty) &&	5467 } else if (isScalarIntegerType(Ty) &&

5341 GprArgCount < Traits::X86_MAX_GPR_ARGS) {	5468 GprArgCount < Traits::X86_MAX_GPR_ARGS) {

5342 // The 64 bit ABI allows some integers to be passed in GPRs.	5469 // The 64 bit ABI allows some integers to be passed in GPRs.

5343 ++GprArgCount;	5470 ++GprArgCount;

5344 } else {	5471 } else {

5345 if (isVectorType(Arg->getType())) {	5472 if (isVectorType(Ty)) {

5346 OutArgumentsSizeBytes =	5473 OutArgumentsSizeBytes =

5347 Traits::applyStackAlignment(OutArgumentsSizeBytes);	5474 Traits::applyStackAlignment(OutArgumentsSizeBytes);

5348 }	5475 }

5349 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Arg->getType());	5476 OutArgumentsSizeBytes += typeWidthInBytesOnStack(Ty);

5350 }	5477 }

5351 }	5478 }

5352 if (Traits::Is64Bit)	5479 if (Traits::Is64Bit)

5353 return OutArgumentsSizeBytes;	5480 return OutArgumentsSizeBytes;

5354 // The 32 bit ABI requires floating point values to be returned on the x87 FP	5481 // The 32 bit ABI requires floating point values to be returned on the x87 FP

5355 // stack. Ensure there is enough space for the fstp/movs for floating returns.	5482 // stack. Ensure there is enough space for the fstp/movs for floating returns.

5356 Variable *Dest = Instr->getDest();	5483 if (isScalarFloatingType(ReturnType)) {

5357 if (Dest == nullptr)

5358 return OutArgumentsSizeBytes;

5359 const Type DestType = Dest->getType();

5360 if (isScalarFloatingType(Dest->getType())) {

5361 OutArgumentsSizeBytes =	5484 OutArgumentsSizeBytes =

5362 std::max(OutArgumentsSizeBytes,	5485 std::max(OutArgumentsSizeBytes,

5363 static_cast<uint32_t>(typeWidthInBytesOnStack(DestType)));	5486 static_cast<uint32_t>(typeWidthInBytesOnStack(ReturnType)));

5364 }	5487 }

5365 return OutArgumentsSizeBytes;	5488 return OutArgumentsSizeBytes;

5366 }	5489 }

5367	5490

5368 template <class Machine>	5491 template <class Machine>

	5492 uint32_t

	5493 TargetX86Base<Machine>::getCallStackArgumentsSizeBytes(const InstCall *Instr) {

	5494 // Build a vector of the arguments' types.

	5495 std::vector<Type> ArgTypes;

	5496 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

	5497 Operand *Arg = Instr->getArg(i);

	5498 ArgTypes.emplace_back(Arg->getType());

	5499 }

	5500 // Compute the return type (if any);

	5501 Type ReturnType = IceType_void;

	5502 Variable *Dest = Instr->getDest();

	5503 if (Dest != nullptr)

	5504 ReturnType = Dest->getType();

	5505 return getCallStackArgumentsSizeBytes(ArgTypes, ReturnType);

	5506 }

	5507

	5508 template <class Machine>

5369 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {	5509 Variable *TargetX86Base<Machine>::makeZeroedRegister(Type Ty, int32_t RegNum) {

5370 Variable *Reg = makeReg(Ty, RegNum);	5510 Variable *Reg = makeReg(Ty, RegNum);

5371 switch (Ty) {	5511 switch (Ty) {

5372 case IceType_i1:	5512 case IceType_i1:

5373 case IceType_i8:	5513 case IceType_i8:

5374 case IceType_i16:	5514 case IceType_i16:

5375 case IceType_i32:	5515 case IceType_i32:

5376 case IceType_i64:	5516 case IceType_i64:

5377 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.	5517 // Conservatively do "mov reg, 0" to avoid modifying FLAGS.

5378 _mov(Reg, Ctx->getConstantZero(Ty));	5518 _mov(Reg, Ctx->getConstantZero(Ty));

(...skipping 687 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6066 }	6206 }

6067 // the offset is not eligible for blinding or pooling, return the original	6207 // the offset is not eligible for blinding or pooling, return the original

6068 // mem operand	6208 // mem operand

6069 return MemOperand;	6209 return MemOperand;

6070 }	6210 }

6071	6211

6072 } // end of namespace X86Internal	6212 } // end of namespace X86Internal

6073 } // end of namespace Ice	6213 } // end of namespace Ice

6074	6214

6075 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	6215 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/assembler/x86/sandboxing.ll » ('j') | no next file with comments »