src/IceTargetLoweringX86BaseImpl.h - Issue 1419903002: Subzero: Refactor x86 register definitions to use the alias mechanism.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1419903002: Subzero: Refactor x86 register definitions to use the alias mechanism. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fix assembler unit tests. Fix register names. Code review changes. Rebase Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 569 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
580 Node->getInsts().insert(I3, RMW);	580 Node->getInsts().insert(I3, RMW);

581 }	581 }

582 }	582 }

583 if (Func->isVerbose(IceV_RMW))	583 if (Func->isVerbose(IceV_RMW))

584 Func->getContext()->unlockStr();	584 Func->getContext()->unlockStr();

585 }	585 }

586	586

587 // Converts a ConstantInteger32 operand into its constant value, or	587 // Converts a ConstantInteger32 operand into its constant value, or

588 // MemoryOrderInvalid if the operand is not a ConstantInteger32.	588 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {	589 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {

590 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))	590 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

591 return Integer->getValue();	591 return Integer->getValue();

592 return Intrinsics::MemoryOrderInvalid;	592 return Intrinsics::MemoryOrderInvalid;

593 }	593 }

594	594

595 /// Determines whether the dest of a Load instruction can be folded into one of	595 /// Determines whether the dest of a Load instruction can be folded into one of

596 /// the src operands of a 2-operand instruction. This is true as long as the	596 /// the src operands of a 2-operand instruction. This is true as long as the

597 /// load dest matches exactly one of the binary instruction's src operands.	597 /// load dest matches exactly one of the binary instruction's src operands.

598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.	598 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.

599 inline bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,	599 inline bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

600 Operand &Src0, Operand &Src1) {	600 Operand &Src0, Operand &Src1) {

(...skipping 14 matching lines...) Expand all Loading...
615 while (!Context.atEnd()) {	615 while (!Context.atEnd()) {

616 Variable *LoadDest = nullptr;	616 Variable *LoadDest = nullptr;

617 Operand *LoadSrc = nullptr;	617 Operand *LoadSrc = nullptr;

618 Inst *CurInst = Context.getCur();	618 Inst *CurInst = Context.getCur();

619 Inst *Next = Context.getNextInst();	619 Inst *Next = Context.getNextInst();

620 // Determine whether the current instruction is a Load instruction or	620 // Determine whether the current instruction is a Load instruction or

621 // equivalent.	621 // equivalent.

622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {	622 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

623 // An InstLoad always qualifies.	623 // An InstLoad always qualifies.

624 LoadDest = Load->getDest();	624 LoadDest = Load->getDest();

625 const bool DoLegalize = false;	625 constexpr bool DoLegalize = false;

626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),	626 LoadSrc = formMemoryOperand(Load->getSourceAddress(),

627 LoadDest->getType(), DoLegalize);	627 LoadDest->getType(), DoLegalize);

628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {	628 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory	629 // An AtomicLoad intrinsic qualifies as long as it has a valid memory

630 // ordering, and can be implemented in a single instruction (i.e., not	630 // ordering, and can be implemented in a single instruction (i.e., not

631 // i64 on x86-32).	631 // i64 on x86-32).

632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;	632 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

633 if (ID == Intrinsics::AtomicLoad &&	633 if (ID == Intrinsics::AtomicLoad &&

634 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&	634 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&

635 Intrinsics::isMemoryOrderValid(	635 Intrinsics::isMemoryOrderValid(

636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {	636 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {

637 LoadDest = Intrin->getDest();	637 LoadDest = Intrin->getDest();

638 const bool DoLegalize = false;	638 constexpr bool DoLegalize = false;

639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),	639 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),

640 DoLegalize);	640 DoLegalize);

641 }	641 }

642 }	642 }

643 // A Load instruction can be folded into the following instruction only	643 // A Load instruction can be folded into the following instruction only

644 // if the following instruction ends the Load's Dest variable's live	644 // if the following instruction ends the Load's Dest variable's live

645 // range.	645 // range.

646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {	646 if (LoadDest && Next && Next->isLastUse(LoadDest)) {

647 assert(LoadSrc);	647 assert(LoadSrc);

648 Inst *NewInst = nullptr;	648 Inst *NewInst = nullptr;

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
726 // considered live upon function entry. Otherwise it's possible to get	726 // considered live upon function entry. Otherwise it's possible to get

727 // liveness validation errors for saving callee-save registers.	727 // liveness validation errors for saving callee-save registers.

728 Func->addImplicitArg(Reg);	728 Func->addImplicitArg(Reg);

729 // Don't bother tracking the live range of a named physical register.	729 // Don't bother tracking the live range of a named physical register.

730 Reg->setIgnoreLiveness();	730 Reg->setIgnoreLiveness();

731 }	731 }

732 return Reg;	732 return Reg;

733 }	733 }

734	734

735 template <class Machine>	735 template <class Machine>

736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {	736 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type) const {

737 return Traits::getRegName(RegNum, Ty);	737 return Traits::getRegName(RegNum);

738 }	738 }

739	739

740 template <class Machine>	740 template <class Machine>

741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {	741 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {

742 if (!BuildDefs::dump())	742 if (!BuildDefs::dump())

743 return;	743 return;

744 Ostream &Str = Ctx->getStrEmit();	744 Ostream &Str = Ctx->getStrEmit();

745 if (Var->hasReg()) {	745 if (Var->hasReg()) {

746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());	746 Str << "%" << getRegName(Var->getRegNum(), Var->getType());

747 return;	747 return;

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
790 if (Var->mustHaveReg()) {	790 if (Var->mustHaveReg()) {

791 llvm_unreachable("Infinite-weight Variable has no register assigned");	791 llvm_unreachable("Infinite-weight Variable has no register assigned");

792 }	792 }

793 int32_t Offset = Var->getStackOffset();	793 int32_t Offset = Var->getStackOffset();

794 int32_t BaseRegNum = Var->getBaseRegNum();	794 int32_t BaseRegNum = Var->getBaseRegNum();

795 if (Var->getBaseRegNum() == Variable::NoRegister) {	795 if (Var->getBaseRegNum() == Variable::NoRegister) {

796 BaseRegNum = getFrameOrStackReg();	796 BaseRegNum = getFrameOrStackReg();

797 if (!hasFramePointer())	797 if (!hasFramePointer())

798 Offset += getStackAdjustment();	798 Offset += getStackAdjustment();

799 }	799 }

800 return typename Traits::Address(	800 return typename Traits::Address(Traits::getEncodedGPR(BaseRegNum), Offset,

801 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset,	801 AssemblerFixup::NoFixup);

802 AssemblerFixup::NoFixup);

803 }	802 }

804	803

805 /// Helper function for addProlog().	804 /// Helper function for addProlog().

806 ///	805 ///

807 /// This assumes Arg is an argument passed on the stack. This sets the frame	806 /// This assumes Arg is an argument passed on the stack. This sets the frame

808 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an	807 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

809 /// I64 arg that has been split into Lo and Hi components, it calls itself	808 /// I64 arg that has been split into Lo and Hi components, it calls itself

810 /// recursively on the components, taking care to handle Lo first because of the	809 /// recursively on the components, taking care to handle Lo first because of the

811 /// little-endian architecture. Lastly, this function generates an instruction	810 /// little-endian architecture. Lastly, this function generates an instruction

812 /// to copy Arg into its assigned register if applicable.	811 /// to copy Arg into its assigned register if applicable.

(...skipping 228 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1041 Src1 /= 2;	1040 Src1 /= 2;

1042 } else {	1041 } else {

1043 return false;	1042 return false;

1044 }	1043 }

1045 }	1044 }

1046 // Lea optimization only works for i16 and i32 types, not i8.	1045 // Lea optimization only works for i16 and i32 types, not i8.

1047 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))	1046 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))

1048 return false;	1047 return false;

1049 // Limit the number of lea/shl operations for a single multiply, to a	1048 // Limit the number of lea/shl operations for a single multiply, to a

1050 // somewhat arbitrary choice of 3.	1049 // somewhat arbitrary choice of 3.

1051 const uint32_t MaxOpsForOptimizedMul = 3;	1050 constexpr uint32_t MaxOpsForOptimizedMul = 3;

1052 if (CountOps > MaxOpsForOptimizedMul)	1051 if (CountOps > MaxOpsForOptimizedMul)

1053 return false;	1052 return false;

1054 _mov(T, Src0);	1053 _mov(T, Src0);

1055 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1054 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1056 for (uint32_t i = 0; i < Count9; ++i) {	1055 for (uint32_t i = 0; i < Count9; ++i) {

1057 const uint16_t Shift = 3; // log2(9-1)	1056 constexpr uint16_t Shift = 3; // log2(9-1)

1058 _lea(T,	1057 _lea(T,

1059 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1058 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1060 }	1059 }

1061 for (uint32_t i = 0; i < Count5; ++i) {	1060 for (uint32_t i = 0; i < Count5; ++i) {

1062 const uint16_t Shift = 2; // log2(5-1)	1061 constexpr uint16_t Shift = 2; // log2(5-1)

1063 _lea(T,	1062 _lea(T,

1064 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1063 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1065 }	1064 }

1066 for (uint32_t i = 0; i < Count3; ++i) {	1065 for (uint32_t i = 0; i < Count3; ++i) {

1067 const uint16_t Shift = 1; // log2(3-1)	1066 constexpr uint16_t Shift = 1; // log2(3-1)

1068 _lea(T,	1067 _lea(T,

1069 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1068 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1070 }	1069 }

1071 if (Count2) {	1070 if (Count2) {

1072 _shl(T, Ctx->getConstantInt(Ty, Count2));	1071 _shl(T, Ctx->getConstantInt(Ty, Count2));

1073 }	1072 }

1074 if (Src1IsNegative)	1073 if (Src1IsNegative)

1075 _neg(T);	1074 _neg(T);

1076 _mov(Dest, T);	1075 _mov(Dest, T);

1077 return true;	1076 return true;

(...skipping 131 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1209 }	1208 }

1210 } else {	1209 } else {

1211 // NON-CONSTANT CASES.	1210 // NON-CONSTANT CASES.

1212 Constant *BitTest = Ctx->getConstantInt32(0x20);	1211 Constant *BitTest = Ctx->getConstantInt32(0x20);

1213 typename Traits::Insts::Label *Label =	1212 typename Traits::Insts::Label *Label =

1214 Traits::Insts::Label::create(Func, this);	1213 Traits::Insts::Label::create(Func, this);

1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==>	1214 // COMMON PREFIX OF: a=b SHIFT_OP c ==>

1216 // t1:ecx = c.lo & 0xff	1215 // t1:ecx = c.lo & 0xff

1217 // t2 = b.lo	1216 // t2 = b.lo

1218 // t3 = b.hi	1217 // t3 = b.hi

1219 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);	1218 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1219 _mov(T_1, Src1Lo);

1220 _mov(T_2, Src0Lo);	1220 _mov(T_2, Src0Lo);

1221 _mov(T_3, Src0Hi);	1221 _mov(T_3, Src0Hi);

1222 switch (Op) {	1222 switch (Op) {

1223 default:	1223 default:

1224 assert(0 && "non-shift op");	1224 assert(0 && "non-shift op");

1225 break;	1225 break;

1226 case InstArithmetic::Shl: {	1226 case InstArithmetic::Shl: {

1227 // a=b<<c ==>	1227 // a=b<<c ==>

1228 // t3 = shld t3, t2, t1	1228 // t3 = shld t3, t2, t1

1229 // t2 = shl t2, t1	1229 // t2 = shl t2, t1

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1317 }	1317 }

1318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	1318 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1319 // These x86-32 helper-call-involved instructions are lowered in this	1319 // These x86-32 helper-call-involved instructions are lowered in this

1320 // separate switch. This is because loOperand() and hiOperand() may insert	1320 // separate switch. This is because loOperand() and hiOperand() may insert

1321 // redundant instructions for constant blinding and pooling. Such redundant	1321 // redundant instructions for constant blinding and pooling. Such redundant

1322 // instructions will fail liveness analysis under -Om1 setting. And,	1322 // instructions will fail liveness analysis under -Om1 setting. And,

1323 // actually these arguments do not need to be processed with loOperand()	1323 // actually these arguments do not need to be processed with loOperand()

1324 // and hiOperand() to be used.	1324 // and hiOperand() to be used.

1325 switch (Inst->getOp()) {	1325 switch (Inst->getOp()) {

1326 case InstArithmetic::Udiv: {	1326 case InstArithmetic::Udiv: {

1327 const SizeT MaxSrcs = 2;	1327 constexpr SizeT MaxSrcs = 2;

1328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);	1328 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);

1329 Call->addArg(Inst->getSrc(0));	1329 Call->addArg(Inst->getSrc(0));

1330 Call->addArg(Inst->getSrc(1));	1330 Call->addArg(Inst->getSrc(1));

1331 lowerCall(Call);	1331 lowerCall(Call);

1332 return;	1332 return;

1333 }	1333 }

1334 case InstArithmetic::Sdiv: {	1334 case InstArithmetic::Sdiv: {

1335 const SizeT MaxSrcs = 2;	1335 constexpr SizeT MaxSrcs = 2;

1336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);	1336 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);

1337 Call->addArg(Inst->getSrc(0));	1337 Call->addArg(Inst->getSrc(0));

1338 Call->addArg(Inst->getSrc(1));	1338 Call->addArg(Inst->getSrc(1));

1339 lowerCall(Call);	1339 lowerCall(Call);

1340 return;	1340 return;

1341 }	1341 }

1342 case InstArithmetic::Urem: {	1342 case InstArithmetic::Urem: {

1343 const SizeT MaxSrcs = 2;	1343 constexpr SizeT MaxSrcs = 2;

1344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);	1344 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);

1345 Call->addArg(Inst->getSrc(0));	1345 Call->addArg(Inst->getSrc(0));

1346 Call->addArg(Inst->getSrc(1));	1346 Call->addArg(Inst->getSrc(1));

1347 lowerCall(Call);	1347 lowerCall(Call);

1348 return;	1348 return;

1349 }	1349 }

1350 case InstArithmetic::Srem: {	1350 case InstArithmetic::Srem: {

1351 const SizeT MaxSrcs = 2;	1351 constexpr SizeT MaxSrcs = 2;

1352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);	1352 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);

1353 Call->addArg(Inst->getSrc(0));	1353 Call->addArg(Inst->getSrc(0));

1354 Call->addArg(Inst->getSrc(1));	1354 Call->addArg(Inst->getSrc(1));

1355 lowerCall(Call);	1355 lowerCall(Call);

1356 return;	1356 return;

1357 }	1357 }

1358 default:	1358 default:

1359 break;	1359 break;

1360 }	1360 }

1361	1361

(...skipping 160 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1522 // pmuludq T1, Src1	1522 // pmuludq T1, Src1

1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}	1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

1524 // pmuludq T2, T3	1524 // pmuludq T2, T3

1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}	1525 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}

1526 // shufps T1, T2, {0,2,0,2}	1526 // shufps T1, T2, {0,2,0,2}

1527 // pshufd T4, T1, {0,2,1,3}	1527 // pshufd T4, T1, {0,2,1,3}

1528 // movups Dest, T4	1528 // movups Dest, T4

1529	1529

1530 // Mask that directs pshufd to create a vector with entries	1530 // Mask that directs pshufd to create a vector with entries

1531 // Src[1, 0, 3, 0]	1531 // Src[1, 0, 3, 0]

1532 const unsigned Constant1030 = 0x31;	1532 constexpr unsigned Constant1030 = 0x31;

1533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);	1533 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);

1534 // Mask that directs shufps to create a vector with entries	1534 // Mask that directs shufps to create a vector with entries

1535 // Dest[0, 2], Src[0, 2]	1535 // Dest[0, 2], Src[0, 2]

1536 const unsigned Mask0202 = 0x88;	1536 constexpr unsigned Mask0202 = 0x88;

1537 // Mask that directs pshufd to create a vector with entries	1537 // Mask that directs pshufd to create a vector with entries

1538 // Src[0, 2, 1, 3]	1538 // Src[0, 2, 1, 3]

1539 const unsigned Mask0213 = 0xd8;	1539 constexpr unsigned Mask0213 = 0xd8;

1540 Variable *T1 = makeReg(IceType_v4i32);	1540 Variable *T1 = makeReg(IceType_v4i32);

1541 Variable *T2 = makeReg(IceType_v4i32);	1541 Variable *T2 = makeReg(IceType_v4i32);

1542 Variable *T3 = makeReg(IceType_v4i32);	1542 Variable *T3 = makeReg(IceType_v4i32);

1543 Variable *T4 = makeReg(IceType_v4i32);	1543 Variable *T4 = makeReg(IceType_v4i32);

1544 _movp(T1, Src0);	1544 _movp(T1, Src0);

1545 _pshufd(T2, Src0, Mask1030);	1545 _pshufd(T2, Src0, Mask1030);

1546 _pshufd(T3, Src1, Mask1030);	1546 _pshufd(T3, Src1, Mask1030);

1547 _pmuludq(T1, Src1);	1547 _pmuludq(T1, Src1);

1548 _pmuludq(T2, T3);	1548 _pmuludq(T2, T3);

1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));	1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

(...skipping 74 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1624 _mov(T, Src0);	1624 _mov(T, Src0);

1625 _sub(T, Src1);	1625 _sub(T, Src1);

1626 _mov(Dest, T);	1626 _mov(Dest, T);

1627 break;	1627 break;

1628 case InstArithmetic::Mul:	1628 case InstArithmetic::Mul:

1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1630 if (optimizeScalarMul(Dest, Src0, C->getValue()))	1630 if (optimizeScalarMul(Dest, Src0, C->getValue()))

1631 return;	1631 return;

1632 }	1632 }

1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must	1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must

1634 // be in eax.	1634 // be in al.

1635 if (isByteSizedArithType(Dest->getType())) {	1635 if (isByteSizedArithType(Dest->getType())) {

1636 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1636 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1637 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1637 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1638 _imul(T, Src0 == Src1 ? T : Src1);	1638 _imul(T, Src0 == Src1 ? T : Src1);

1639 _mov(Dest, T);	1639 _mov(Dest, T);

1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1641 T = makeReg(Dest->getType());	1641 T = makeReg(Dest->getType());

1642 _imul_imm(T, Src0, ImmConst);	1642 _imul_imm(T, Src0, ImmConst);

1643 _mov(Dest, T);	1643 _mov(Dest, T);

1644 } else {	1644 } else {

1645 _mov(T, Src0);	1645 _mov(T, Src0);

1646 _imul(T, Src0 == Src1 ? T : Src1);	1646 _imul(T, Src0 == Src1 ? T : Src1);

1647 _mov(Dest, T);	1647 _mov(Dest, T);

1648 }	1648 }

1649 break;	1649 break;

1650 case InstArithmetic::Shl:	1650 case InstArithmetic::Shl:

1651 _mov(T, Src0);	1651 _mov(T, Src0);

1652 if (!llvm::isa<ConstantInteger32>(Src1))	1652 if (!llvm::isa<ConstantInteger32>(Src1)) {

1653 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1653 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1654 _mov(Cl, Src1);

	1655 Src1 = Cl;

	1656 }

1654 _shl(T, Src1);	1657 _shl(T, Src1);

1655 _mov(Dest, T);	1658 _mov(Dest, T);

1656 break;	1659 break;

1657 case InstArithmetic::Lshr:	1660 case InstArithmetic::Lshr:

1658 _mov(T, Src0);	1661 _mov(T, Src0);

1659 if (!llvm::isa<ConstantInteger32>(Src1))	1662 if (!llvm::isa<ConstantInteger32>(Src1)) {

1660 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1663 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1664 _mov(Cl, Src1);

	1665 Src1 = Cl;

	1666 }

1661 _shr(T, Src1);	1667 _shr(T, Src1);

1662 _mov(Dest, T);	1668 _mov(Dest, T);

1663 break;	1669 break;

1664 case InstArithmetic::Ashr:	1670 case InstArithmetic::Ashr:

1665 _mov(T, Src0);	1671 _mov(T, Src0);

1666 if (!llvm::isa<ConstantInteger32>(Src1))	1672 if (!llvm::isa<ConstantInteger32>(Src1)) {

1667 Src1 = legalizeToReg(Src1, Traits::RegisterSet::Reg_ecx);	1673 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);

	1674 _mov(Cl, Src1);

	1675 Src1 = Cl;

	1676 }

1668 _sar(T, Src1);	1677 _sar(T, Src1);

1669 _mov(Dest, T);	1678 _mov(Dest, T);

1670 break;	1679 break;

1671 case InstArithmetic::Udiv:	1680 case InstArithmetic::Udiv:

1672 // div and idiv are the few arithmetic operators that do not allow	1681 // div and idiv are the few arithmetic operators that do not allow

1673 // immediates as the operand.	1682 // immediates as the operand.

1674 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1683 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1675 if (isByteSizedArithType(Dest->getType())) {	1684 if (isByteSizedArithType(Dest->getType())) {

1676 // For 8-bit unsigned division we need to zero-extend al into ah. A mov	1685 // For 8-bit unsigned division we need to zero-extend al into ah. A mov

1677 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64	1686 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64

1678 // assembler refuses to encode %ah (encoding %spl with a REX prefix	1687 // assembler refuses to encode %ah (encoding %spl with a REX prefix

1679 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah	1688 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah

1680 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and	1689 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and

1681 // d[%lh], which means the X86 target lowering (and the register	1690 // d[%lh], which means the X86 target lowering (and the register

1682 // allocator) would have to be aware of this restriction. For now, we	1691 // allocator) would have to be aware of this restriction. For now, we

1683 // simply zero %eax completely, and move the dividend into %al.	1692 // simply zero %eax completely, and move the dividend into %al.

1684 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1693 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1685 Context.insert(InstFakeDef::create(Func, T_eax));	1694 Context.insert(InstFakeDef::create(Func, T_eax));

1686 _xor(T_eax, T_eax);	1695 _xor(T_eax, T_eax);

1687 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1696 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1688 _div(T, Src1, T);	1697 _div(T, Src1, T);

1689 _mov(Dest, T);	1698 _mov(Dest, T);

1690 Context.insert(InstFakeUse::create(Func, T_eax));	1699 Context.insert(InstFakeUse::create(Func, T_eax));

1691 } else {	1700 } else {

1692 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1701 Type Ty = Dest->getType();

1693 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1702 uint32_t Eax = Traits::RegisterSet::Reg_eax;

1694 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1703 uint32_t Edx = Traits::RegisterSet::Reg_edx;

	1704 switch (Ty) {

	1705 default:

	1706 llvm_unreachable("Bad type for udiv");

	1707 // fallthrough

	1708 case IceType_i32:

	1709 break;

	1710 case IceType_i16:

	1711 Eax = Traits::RegisterSet::Reg_ax;

	1712 Edx = Traits::RegisterSet::Reg_dx;

	1713 break;

	1714 }

	1715 Constant *Zero = Ctx->getConstantZero(Ty);

	1716 _mov(T, Src0, Eax);

	1717 _mov(T_edx, Zero, Edx);

1695 _div(T, Src1, T_edx);	1718 _div(T, Src1, T_edx);

1696 _mov(Dest, T);	1719 _mov(Dest, T);

1697 }	1720 }

1698 break;	1721 break;

1699 case InstArithmetic::Sdiv:	1722 case InstArithmetic::Sdiv:

1700 // TODO(stichnot): Enable this after doing better performance and cross	1723 // TODO(stichnot): Enable this after doing better performance and cross

1701 // testing.	1724 // testing.

1702 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1725 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1703 // Optimize division by constant power of 2, but not for Om1 or O0, just	1726 // Optimize division by constant power of 2, but not for Om1 or O0, just

1704 // to keep things simple there.	1727 // to keep things simple there.

(...skipping 21 matching lines...) Expand all Loading...
1726 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	1749 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

1727 _add(T, Src0);	1750 _add(T, Src0);

1728 _sar(T, Ctx->getConstantInt(Ty, LogDiv));	1751 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

1729 }	1752 }

1730 _mov(Dest, T);	1753 _mov(Dest, T);

1731 return;	1754 return;

1732 }	1755 }

1733 }	1756 }

1734 }	1757 }

1735 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1758 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1736 if (isByteSizedArithType(Dest->getType())) {	1759 switch (Type Ty = Dest->getType()) {

	1760 default:

	1761 llvm_unreachable("Bad type for sdiv");

	1762 // fallthrough

	1763 case IceType_i32:

	1764 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1737 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1765 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1738 _cbwdq(T, T);	1766 break;

1739 _idiv(T, Src1, T);	1767 case IceType_i16:

1740 _mov(Dest, T);	1768 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);

1741 } else {	1769 _mov(T, Src0, Traits::RegisterSet::Reg_ax);

1742 T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	1770 break;

1743 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1771 case IceType_i8:

1744 _cbwdq(T_edx, T);	1772 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);

1745 _idiv(T, Src1, T_edx);	1773 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1746 _mov(Dest, T);	1774 break;

1747 }	1775 }

	1776 _cbwdq(T_edx, T);

	1777 _idiv(T, Src1, T_edx);

	1778 _mov(Dest, T);

1748 break;	1779 break;

1749 case InstArithmetic::Urem:	1780 case InstArithmetic::Urem:

1750 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1781 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1751 if (isByteSizedArithType(Dest->getType())) {	1782 if (isByteSizedArithType(Dest->getType())) {

1752 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1783 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

1753 Context.insert(InstFakeDef::create(Func, T_eax));	1784 Context.insert(InstFakeDef::create(Func, T_eax));

1754 _xor(T_eax, T_eax);	1785 _xor(T_eax, T_eax);

1755 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1786 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1756 _div(T, Src1, T);	1787 _div(T, Src1, T);

1757 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't	1788 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

1758 // mov %ah, %al because it would make x86-64 codegen more complicated. If	1789 // mov %ah, %al because it would make x86-64 codegen more complicated. If

1759 // this ever becomes a problem we can introduce a pseudo rem instruction	1790 // this ever becomes a problem we can introduce a pseudo rem instruction

1760 // that returns the remainder in %al directly (and uses a mov for copying	1791 // that returns the remainder in %al directly (and uses a mov for copying

1761 // %ah to %al.)	1792 // %ah to %al.)

1762 static constexpr uint8_t AlSizeInBits = 8;	1793 static constexpr uint8_t AlSizeInBits = 8;

1763 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));	1794 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

1764 _mov(Dest, T);	1795 _mov(Dest, T);

1765 Context.insert(InstFakeUse::create(Func, T_eax));	1796 Context.insert(InstFakeUse::create(Func, T_eax));

1766 } else {	1797 } else {

1767 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1798 Type Ty = Dest->getType();

1768 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);	1799 uint32_t Eax = Traits::RegisterSet::Reg_eax;

	1800 uint32_t Edx = Traits::RegisterSet::Reg_edx;

	1801 switch (Ty) {

	1802 default:

	1803 llvm_unreachable("Bad type for urem");

	1804 // fallthrough

	1805 case IceType_i32:

	1806 break;

	1807 case IceType_i16:

	1808 Eax = Traits::RegisterSet::Reg_ax;

	1809 Edx = Traits::RegisterSet::Reg_dx;

	1810 break;

	1811 }

	1812 Constant *Zero = Ctx->getConstantZero(Ty);

	1813 T_edx = makeReg(Dest->getType(), Edx);

1769 _mov(T_edx, Zero);	1814 _mov(T_edx, Zero);

1770 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1815 _mov(T, Src0, Eax);

1771 _div(T_edx, Src1, T);	1816 _div(T_edx, Src1, T);

1772 _mov(Dest, T_edx);	1817 _mov(Dest, T_edx);

1773 }	1818 }

1774 break;	1819 break;

1775 case InstArithmetic::Srem:	1820 case InstArithmetic::Srem:

1776 // TODO(stichnot): Enable this after doing better performance and cross	1821 // TODO(stichnot): Enable this after doing better performance and cross

1777 // testing.	1822 // testing.

1778 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1823 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1779 // Optimize mod by constant power of 2, but not for Om1 or O0, just to	1824 // Optimize mod by constant power of 2, but not for Om1 or O0, just to

1780 // keep things simple there.	1825 // keep things simple there.

(...skipping 26 matching lines...) Expand all Loading...
1807 _add(T, Src0);	1852 _add(T, Src0);

1808 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	1853 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

1809 _sub(T, Src0);	1854 _sub(T, Src0);

1810 _neg(T);	1855 _neg(T);

1811 _mov(Dest, T);	1856 _mov(Dest, T);

1812 return;	1857 return;

1813 }	1858 }

1814 }	1859 }

1815 }	1860 }

1816 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1861 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1817 if (isByteSizedArithType(Dest->getType())) {	1862 switch (Type Ty = Dest->getType()) {

1818 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1863 default:

1819 // T is %al.	1864 llvm_unreachable("Bad type for srem");

1820 _cbwdq(T, T);	1865 // fallthrough

1821 _idiv(T, Src1, T);	1866 case IceType_i32:

1822 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1867 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1823 Context.insert(InstFakeDef::create(Func, T_eax));

1824 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't

1825 // mov %ah, %al because it would make x86-64 codegen more complicated. If

1826 // this ever becomes a problem we can introduce a pseudo rem instruction

1827 // that returns the remainder in %al directly (and uses a mov for copying

1828 // %ah to %al.)

1829 static constexpr uint8_t AlSizeInBits = 8;

1830 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));

1831 _mov(Dest, T);

1832 Context.insert(InstFakeUse::create(Func, T_eax));

1833 } else {

1834 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);

1835 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1868 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1836 _cbwdq(T_edx, T);	1869 _cbwdq(T_edx, T);

1837 _idiv(T_edx, Src1, T);	1870 _idiv(T_edx, Src1, T);

1838 _mov(Dest, T_edx);	1871 _mov(Dest, T_edx);

	1872 break;

	1873 case IceType_i16:

	1874 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);

	1875 _mov(T, Src0, Traits::RegisterSet::Reg_ax);

	1876 _cbwdq(T_edx, T);

	1877 _idiv(T_edx, Src1, T);

	1878 _mov(Dest, T_edx);

	1879 break;

	1880 case IceType_i8:

	1881 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);

	1882 // TODO(stichnot): Use register ah for T_edx, and remove the _shr().

	1883 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);

	1884 _mov(T, Src0, Traits::RegisterSet::Reg_al);

	1885 _cbwdq(T_edx, T);

	1886 _idiv(T_edx, Src1, T);

	1887 static constexpr uint8_t AlSizeInBits = 8;

	1888 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));

	1889 _mov(Dest, T_edx);

	1890 break;

1839 }	1891 }

1840 break;	1892 break;

1841 case InstArithmetic::Fadd:	1893 case InstArithmetic::Fadd:

1842 _mov(T, Src0);	1894 _mov(T, Src0);

1843 _addss(T, Src1);	1895 _addss(T, Src1);

1844 _mov(Dest, T);	1896 _mov(Dest, T);

1845 break;	1897 break;

1846 case InstArithmetic::Fsub:	1898 case InstArithmetic::Fsub:

1847 _mov(T, Src0);	1899 _mov(T, Src0);

1848 _subss(T, Src1);	1900 _subss(T, Src1);

1849 _mov(Dest, T);	1901 _mov(Dest, T);

1850 break;	1902 break;

1851 case InstArithmetic::Fmul:	1903 case InstArithmetic::Fmul:

1852 _mov(T, Src0);	1904 _mov(T, Src0);

1853 _mulss(T, Src0 == Src1 ? T : Src1);	1905 _mulss(T, Src0 == Src1 ? T : Src1);

1854 _mov(Dest, T);	1906 _mov(Dest, T);

1855 break;	1907 break;

1856 case InstArithmetic::Fdiv:	1908 case InstArithmetic::Fdiv:

1857 _mov(T, Src0);	1909 _mov(T, Src0);

1858 _divss(T, Src1);	1910 _divss(T, Src1);

1859 _mov(Dest, T);	1911 _mov(Dest, T);

1860 break;	1912 break;

1861 case InstArithmetic::Frem: {	1913 case InstArithmetic::Frem: {

1862 const SizeT MaxSrcs = 2;	1914 constexpr SizeT MaxSrcs = 2;

1863 Type Ty = Dest->getType();	1915 Type Ty = Dest->getType();

1864 InstCall *Call = makeHelperCall(	1916 InstCall *Call = makeHelperCall(

1865 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);	1917 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1866 Call->addArg(Src0);	1918 Call->addArg(Src0);

1867 Call->addArg(Src1);	1919 Call->addArg(Src1);

1868 return lowerCall(Call);	1920 return lowerCall(Call);

1869 }	1921 }

1870 }	1922 }

1871 }	1923 }

1872	1924

(...skipping 234 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2107 if (isVectorType(Dest->getType())) {	2159 if (isVectorType(Dest->getType())) {

2108 assert(Dest->getType() == IceType_v4i32 &&	2160 assert(Dest->getType() == IceType_v4i32 &&

2109 Inst->getSrc(0)->getType() == IceType_v4f32);	2161 Inst->getSrc(0)->getType() == IceType_v4f32);

2110 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2162 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2111 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2163 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2112 Src0RM = legalizeToReg(Src0RM);	2164 Src0RM = legalizeToReg(Src0RM);

2113 Variable *T = makeReg(Dest->getType());	2165 Variable *T = makeReg(Dest->getType());

2114 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);	2166 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

2115 _movp(Dest, T);	2167 _movp(Dest, T);

2116 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	2168 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

2117 const SizeT MaxSrcs = 1;	2169 constexpr SizeT MaxSrcs = 1;

2118 Type SrcType = Inst->getSrc(0)->getType();	2170 Type SrcType = Inst->getSrc(0)->getType();

2119 InstCall *Call =	2171 InstCall *Call =

2120 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64	2172 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2121 : H_fptosi_f64_i64,	2173 : H_fptosi_f64_i64,

2122 Dest, MaxSrcs);	2174 Dest, MaxSrcs);

2123 Call->addArg(Inst->getSrc(0));	2175 Call->addArg(Inst->getSrc(0));

2124 lowerCall(Call);	2176 lowerCall(Call);

2125 } else {	2177 } else {

2126 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2178 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2127 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2179 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

(...skipping 10 matching lines...) Expand all Loading...
2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2190 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2139 if (Dest->getType() == IceType_i1)	2191 if (Dest->getType() == IceType_i1)

2140 _and(T_2, Ctx->getConstantInt1(1));	2192 _and(T_2, Ctx->getConstantInt1(1));

2141 _mov(Dest, T_2);	2193 _mov(Dest, T_2);

2142 }	2194 }

2143 break;	2195 break;

2144 case InstCast::Fptoui:	2196 case InstCast::Fptoui:

2145 if (isVectorType(Dest->getType())) {	2197 if (isVectorType(Dest->getType())) {

2146 assert(Dest->getType() == IceType_v4i32 &&	2198 assert(Dest->getType() == IceType_v4i32 &&

2147 Inst->getSrc(0)->getType() == IceType_v4f32);	2199 Inst->getSrc(0)->getType() == IceType_v4f32);

2148 const SizeT MaxSrcs = 1;	2200 constexpr SizeT MaxSrcs = 1;

2149 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);	2201 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);

2150 Call->addArg(Inst->getSrc(0));	2202 Call->addArg(Inst->getSrc(0));

2151 lowerCall(Call);	2203 lowerCall(Call);

2152 } else if (Dest->getType() == IceType_i64 \|\|	2204 } else if (Dest->getType() == IceType_i64 \|\|

2153 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {	2205 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {

2154 // Use a helper for both x86-32 and x86-64.	2206 // Use a helper for both x86-32 and x86-64.

2155 const SizeT MaxSrcs = 1;	2207 constexpr SizeT MaxSrcs = 1;

2156 Type DestType = Dest->getType();	2208 Type DestType = Dest->getType();

2157 Type SrcType = Inst->getSrc(0)->getType();	2209 Type SrcType = Inst->getSrc(0)->getType();

2158 IceString TargetString;	2210 IceString TargetString;

2159 if (Traits::Is64Bit) {	2211 if (Traits::Is64Bit) {

2160 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64	2212 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2161 : H_fptoui_f64_i64;	2213 : H_fptoui_f64_i64;

2162 } else if (isInt32Asserting32Or64(DestType)) {	2214 } else if (isInt32Asserting32Or64(DestType)) {

2163 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32	2215 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

2164 : H_fptoui_f64_i32;	2216 : H_fptoui_f64_i32;

2165 } else {	2217 } else {

(...skipping 28 matching lines...) Expand all Loading...
2194 assert(Dest->getType() == IceType_v4f32 &&	2246 assert(Dest->getType() == IceType_v4f32 &&

2195 Inst->getSrc(0)->getType() == IceType_v4i32);	2247 Inst->getSrc(0)->getType() == IceType_v4i32);

2196 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2248 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2197 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2249 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2198 Src0RM = legalizeToReg(Src0RM);	2250 Src0RM = legalizeToReg(Src0RM);

2199 Variable *T = makeReg(Dest->getType());	2251 Variable *T = makeReg(Dest->getType());

2200 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);	2252 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);

2201 _movp(Dest, T);	2253 _movp(Dest, T);

2202 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {	2254 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {

2203 // Use a helper for x86-32.	2255 // Use a helper for x86-32.

2204 const SizeT MaxSrcs = 1;	2256 constexpr SizeT MaxSrcs = 1;

2205 Type DestType = Dest->getType();	2257 Type DestType = Dest->getType();

2206 InstCall *Call =	2258 InstCall *Call =

2207 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32	2259 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32

2208 : H_sitofp_i64_f64,	2260 : H_sitofp_i64_f64,

2209 Dest, MaxSrcs);	2261 Dest, MaxSrcs);

2210 // TODO: Call the correct compiler-rt helper function.	2262 // TODO: Call the correct compiler-rt helper function.

2211 Call->addArg(Inst->getSrc(0));	2263 Call->addArg(Inst->getSrc(0));

2212 lowerCall(Call);	2264 lowerCall(Call);

2213 return;	2265 return;

2214 } else {	2266 } else {

(...skipping 14 matching lines...) Expand all Loading...
2229 _movsx(T_1, Src0RM);	2281 _movsx(T_1, Src0RM);

2230 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2282 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2231 _mov(Dest, T_2);	2283 _mov(Dest, T_2);

2232 }	2284 }

2233 break;	2285 break;

2234 case InstCast::Uitofp: {	2286 case InstCast::Uitofp: {

2235 Operand *Src0 = Inst->getSrc(0);	2287 Operand *Src0 = Inst->getSrc(0);

2236 if (isVectorType(Src0->getType())) {	2288 if (isVectorType(Src0->getType())) {

2237 assert(Dest->getType() == IceType_v4f32 &&	2289 assert(Dest->getType() == IceType_v4f32 &&

2238 Src0->getType() == IceType_v4i32);	2290 Src0->getType() == IceType_v4i32);

2239 const SizeT MaxSrcs = 1;	2291 constexpr SizeT MaxSrcs = 1;

2240 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);	2292 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2241 Call->addArg(Src0);	2293 Call->addArg(Src0);

2242 lowerCall(Call);	2294 lowerCall(Call);

2243 } else if (Src0->getType() == IceType_i64 \|\|	2295 } else if (Src0->getType() == IceType_i64 \|\|

2244 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {	2296 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

2245 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on	2297 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on

2246 // x86-32.	2298 // x86-32.

2247 const SizeT MaxSrcs = 1;	2299 constexpr SizeT MaxSrcs = 1;

2248 Type DestType = Dest->getType();	2300 Type DestType = Dest->getType();

2249 IceString TargetString;	2301 IceString TargetString;

2250 if (isInt32Asserting32Or64(Src0->getType())) {	2302 if (isInt32Asserting32Or64(Src0->getType())) {

2251 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32	2303 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32

2252 : H_uitofp_i32_f64;	2304 : H_uitofp_i32_f64;

2253 } else {	2305 } else {

2254 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32	2306 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32

2255 : H_uitofp_i64_f64;	2307 : H_uitofp_i64_f64;

2256 }	2308 }

2257 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);	2309 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

(...skipping 195 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2453 Operand *SourceVectNotLegalized = Inst->getSrc(0);	2505 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2454 ConstantInteger32 *ElementIndex =	2506 ConstantInteger32 *ElementIndex =

2455 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));	2507 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));

2456 // Only constant indices are allowed in PNaCl IR.	2508 // Only constant indices are allowed in PNaCl IR.

2457 assert(ElementIndex);	2509 assert(ElementIndex);

2458	2510

2459 unsigned Index = ElementIndex->getValue();	2511 unsigned Index = ElementIndex->getValue();

2460 Type Ty = SourceVectNotLegalized->getType();	2512 Type Ty = SourceVectNotLegalized->getType();

2461 Type ElementTy = typeElementType(Ty);	2513 Type ElementTy = typeElementType(Ty);

2462 Type InVectorElementTy = Traits::getInVectorElementType(Ty);	2514 Type InVectorElementTy = Traits::getInVectorElementType(Ty);

2463 Variable *ExtractedElementR = makeReg(InVectorElementTy);

2464	2515

2465 // TODO(wala): Determine the best lowering sequences for each type.	2516 // TODO(wala): Determine the best lowering sequences for each type.

2466 bool CanUsePextr = Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|	2517 bool CanUsePextr = Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|

2467 InstructionSet >= Traits::SSE4_1;	2518 (InstructionSet >= Traits::SSE4_1 && Ty != IceType_v4f32);

2468 if (CanUsePextr && Ty != IceType_v4f32) {	2519 Variable *ExtractedElementR =

2469 // Use pextrb, pextrw, or pextrd.	2520 makeReg(CanUsePextr ? IceType_i32 : InVectorElementTy);

	2521 if (CanUsePextr) {

	2522 // Use pextrb, pextrw, or pextrd. The "b" and "w" versions clear the upper

	2523 // bits of the destination register, so we represent this by always

	2524 // extracting into an i32 register. The _mov into Dest below will do

	2525 // truncation as necessary.

2470 Constant *Mask = Ctx->getConstantInt32(Index);	2526 Constant *Mask = Ctx->getConstantInt32(Index);

2471 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);	2527 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);

2472 _pextr(ExtractedElementR, SourceVectR, Mask);	2528 _pextr(ExtractedElementR, SourceVectR, Mask);

2473 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2529 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2474 // Use pshufd and movd/movss.	2530 // Use pshufd and movd/movss.

2475 Variable *T = nullptr;	2531 Variable *T = nullptr;

2476 if (Index) {	2532 if (Index) {

2477 // The shuffle only needs to occur if the element to be extracted is not	2533 // The shuffle only needs to occur if the element to be extracted is not

2478 // at the lowest index.	2534 // at the lowest index.

2479 Constant *Mask = Ctx->getConstantInt32(Index);	2535 Constant *Mask = Ctx->getConstantInt32(Index);

(...skipping 496 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2976 // Use insertps, pinsrb, pinsrw, or pinsrd.	3032 // Use insertps, pinsrb, pinsrw, or pinsrd.

2977 Operand *ElementRM =	3033 Operand *ElementRM =

2978 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);	3034 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

2979 Operand *SourceVectRM =	3035 Operand *SourceVectRM =

2980 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	3036 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2981 Variable *T = makeReg(Ty);	3037 Variable *T = makeReg(Ty);

2982 _movp(T, SourceVectRM);	3038 _movp(T, SourceVectRM);

2983 if (Ty == IceType_v4f32)	3039 if (Ty == IceType_v4f32)

2984 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));	3040 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));

2985 else	3041 else

	3042 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source

	3043 // operand is a register, it must be a full r32 register like eax, and not

	3044 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates

	3045 // for the use of r16 and r8 by converting them through getBaseReg(),

	3046 // while emitIAS() validates that the original and base register encodings

	3047 // are the same. But for an "interior" register like ah, it should

	3048 // probably be copied into an r32 via movzx so that the types work out.

2986 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));	3049 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));

2987 _movp(Inst->getDest(), T);	3050 _movp(Inst->getDest(), T);

2988 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	3051 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2989 // Use shufps or movss.	3052 // Use shufps or movss.

2990 Variable *ElementR = nullptr;	3053 Variable *ElementR = nullptr;

2991 Operand *SourceVectRM =	3054 Operand *SourceVectRM =

2992 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	3055 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2993	3056

2994 if (InVectorElementTy == IceType_f32) {	3057 if (InVectorElementTy == IceType_f32) {

2995 // ElementR will be in an XMM register since it is floating point.	3058 // ElementR will be in an XMM register since it is floating point.

(...skipping 314 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3310 // well-defined value.	3373 // well-defined value.

3311 Operand *Val = legalize(Instr->getArg(0));	3374 Operand *Val = legalize(Instr->getArg(0));

3312 Operand *FirstVal;	3375 Operand *FirstVal;

3313 Operand *SecondVal = nullptr;	3376 Operand *SecondVal = nullptr;

3314 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	3377 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

3315 FirstVal = loOperand(Val);	3378 FirstVal = loOperand(Val);

3316 SecondVal = hiOperand(Val);	3379 SecondVal = hiOperand(Val);

3317 } else {	3380 } else {

3318 FirstVal = Val;	3381 FirstVal = Val;

3319 }	3382 }

3320 const bool IsCttz = false;	3383 constexpr bool IsCttz = false;

3321 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3384 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3322 SecondVal);	3385 SecondVal);

3323 return;	3386 return;

3324 }	3387 }

3325 case Intrinsics::Cttz: {	3388 case Intrinsics::Cttz: {

3326 // The "is zero undef" parameter is ignored and we always return a	3389 // The "is zero undef" parameter is ignored and we always return a

3327 // well-defined value.	3390 // well-defined value.

3328 Operand *Val = legalize(Instr->getArg(0));	3391 Operand *Val = legalize(Instr->getArg(0));

3329 Operand *FirstVal;	3392 Operand *FirstVal;

3330 Operand *SecondVal = nullptr;	3393 Operand *SecondVal = nullptr;

3331 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	3394 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

3332 FirstVal = hiOperand(Val);	3395 FirstVal = hiOperand(Val);

3333 SecondVal = loOperand(Val);	3396 SecondVal = loOperand(Val);

3334 } else {	3397 } else {

3335 FirstVal = Val;	3398 FirstVal = Val;

3336 }	3399 }

3337 const bool IsCttz = true;	3400 constexpr bool IsCttz = true;

3338 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3401 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3339 SecondVal);	3402 SecondVal);

3340 return;	3403 return;

3341 }	3404 }

3342 case Intrinsics::Fabs: {	3405 case Intrinsics::Fabs: {

3343 Operand *Src = legalize(Instr->getArg(0));	3406 Operand *Src = legalize(Instr->getArg(0));

3344 Type Ty = Src->getType();	3407 Type Ty = Src->getType();

3345 Variable *Dest = Instr->getDest();	3408 Variable *Dest = Instr->getDest();

3346 Variable *T = makeVectorOfFabsMask(Ty);	3409 Variable *T = makeVectorOfFabsMask(Ty);

3347 // The pand instruction operates on an m128 memory operand, so if Src is an	3410 // The pand instruction operates on an m128 memory operand, so if Src is an

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3425 Func->setError("Should not be lowering UnknownIntrinsic");	3488 Func->setError("Should not be lowering UnknownIntrinsic");

3426 return;	3489 return;

3427 }	3490 }

3428 return;	3491 return;

3429 }	3492 }

3430	3493

3431 template <class Machine>	3494 template <class Machine>

3432 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,	3495 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,

3433 Operand Ptr, Operand Expected,	3496 Operand Ptr, Operand Expected,

3434 Operand *Desired) {	3497 Operand *Desired) {

3435 if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {	3498 Type Ty = Expected->getType();

	3499 if (!Traits::Is64Bit && Ty == IceType_i64) {

3436 // Reserve the pre-colored registers first, before adding any more	3500 // Reserve the pre-colored registers first, before adding any more

3437 // infinite-weight variables from formMemoryOperand's legalization.	3501 // infinite-weight variables from formMemoryOperand's legalization.

3438 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);	3502 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

3439 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	3503 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

3440 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);	3504 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

3441 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);	3505 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);

3442 _mov(T_eax, loOperand(Expected));	3506 _mov(T_eax, loOperand(Expected));

3443 _mov(T_edx, hiOperand(Expected));	3507 _mov(T_edx, hiOperand(Expected));

3444 _mov(T_ebx, loOperand(Desired));	3508 _mov(T_ebx, loOperand(Desired));

3445 _mov(T_ecx, hiOperand(Desired));	3509 _mov(T_ecx, hiOperand(Desired));

3446 typename Traits::X86OperandMem *Addr =	3510 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3447 formMemoryOperand(Ptr, Expected->getType());	3511 constexpr bool Locked = true;

3448 const bool Locked = true;

3449 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3512 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3450 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));	3513 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

3451 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));	3514 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

3452 _mov(DestLo, T_eax);	3515 _mov(DestLo, T_eax);

3453 _mov(DestHi, T_edx);	3516 _mov(DestHi, T_edx);

3454 return;	3517 return;

3455 }	3518 }

3456 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);	3519 int32_t Eax;

	3520 switch (Ty) {

	3521 default:

	3522 llvm_unreachable("Bad type for cmpxchg");

	3523 // fallthrough

	3524 case IceType_i32:

	3525 Eax = Traits::RegisterSet::Reg_eax;

	3526 break;

	3527 case IceType_i16:

	3528 Eax = Traits::RegisterSet::Reg_ax;

	3529 break;

	3530 case IceType_i8:

	3531 Eax = Traits::RegisterSet::Reg_al;

	3532 break;

	3533 }

	3534 Variable *T_eax = makeReg(Ty, Eax);

3457 _mov(T_eax, Expected);	3535 _mov(T_eax, Expected);

3458 typename Traits::X86OperandMem *Addr =	3536 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3459 formMemoryOperand(Ptr, Expected->getType());

3460 Variable *DesiredReg = legalizeToReg(Desired);	3537 Variable *DesiredReg = legalizeToReg(Desired);

3461 const bool Locked = true;	3538 constexpr bool Locked = true;

3462 _cmpxchg(Addr, T_eax, DesiredReg, Locked);	3539 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

3463 _mov(DestPrev, T_eax);	3540 _mov(DestPrev, T_eax);

3464 }	3541 }

3465	3542

3466 template <class Machine>	3543 template <class Machine>

3467 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,	3544 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,

3468 Operand *PtrToMem,	3545 Operand *PtrToMem,

3469 Operand *Expected,	3546 Operand *Expected,

3470 Operand *Desired) {	3547 Operand *Desired) {

3471 if (Ctx->getFlags().getOptLevel() == Opt_m1)	3548 if (Ctx->getFlags().getOptLevel() == Opt_m1)

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3553 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	3630 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3554 // All the fall-through paths must set this to true, but use this	3631 // All the fall-through paths must set this to true, but use this

3555 // for asserting.	3632 // for asserting.

3556 NeedsCmpxchg = true;	3633 NeedsCmpxchg = true;

3557 Op_Lo = &TargetX86Base<Machine>::_add;	3634 Op_Lo = &TargetX86Base<Machine>::_add;

3558 Op_Hi = &TargetX86Base<Machine>::_adc;	3635 Op_Hi = &TargetX86Base<Machine>::_adc;

3559 break;	3636 break;

3560 }	3637 }

3561 typename Traits::X86OperandMem *Addr =	3638 typename Traits::X86OperandMem *Addr =

3562 formMemoryOperand(Ptr, Dest->getType());	3639 formMemoryOperand(Ptr, Dest->getType());

3563 const bool Locked = true;	3640 constexpr bool Locked = true;

3564 Variable *T = nullptr;	3641 Variable *T = nullptr;

3565 _mov(T, Val);	3642 _mov(T, Val);

3566 _xadd(Addr, T, Locked);	3643 _xadd(Addr, T, Locked);

3567 _mov(Dest, T);	3644 _mov(Dest, T);

3568 return;	3645 return;

3569 }	3646 }

3570 case Intrinsics::AtomicSub: {	3647 case Intrinsics::AtomicSub: {

3571 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	3648 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

3572 NeedsCmpxchg = true;	3649 NeedsCmpxchg = true;

3573 Op_Lo = &TargetX86Base<Machine>::_sub;	3650 Op_Lo = &TargetX86Base<Machine>::_sub;

3574 Op_Hi = &TargetX86Base<Machine>::_sbb;	3651 Op_Hi = &TargetX86Base<Machine>::_sbb;

3575 break;	3652 break;

3576 }	3653 }

3577 typename Traits::X86OperandMem *Addr =	3654 typename Traits::X86OperandMem *Addr =

3578 formMemoryOperand(Ptr, Dest->getType());	3655 formMemoryOperand(Ptr, Dest->getType());

3579 const bool Locked = true;	3656 constexpr bool Locked = true;

3580 Variable *T = nullptr;	3657 Variable *T = nullptr;

3581 _mov(T, Val);	3658 _mov(T, Val);

3582 _neg(T);	3659 _neg(T);

3583 _xadd(Addr, T, Locked);	3660 _xadd(Addr, T, Locked);

3584 _mov(Dest, T);	3661 _mov(Dest, T);

3585 return;	3662 return;

3586 }	3663 }

3587 case Intrinsics::AtomicOr:	3664 case Intrinsics::AtomicOr:

3588 // TODO(jvoung): If Dest is null or dead, then some of these	3665 // TODO(jvoung): If Dest is null or dead, then some of these

3589 // operations do not need an "exchange", but just a locked op.	3666 // operations do not need an "exchange", but just a locked op.

(...skipping 87 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3677 _mov(T_ecx, T_edx);	3754 _mov(T_ecx, T_edx);

3678 (this->*Op_Hi)(T_ecx, hiOperand(Val));	3755 (this->*Op_Hi)(T_ecx, hiOperand(Val));

3679 } else {	3756 } else {

3680 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.	3757 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.

3681 // It just needs the Val loaded into ebx and ecx.	3758 // It just needs the Val loaded into ebx and ecx.

3682 // That can also be done before the loop.	3759 // That can also be done before the loop.

3683 _mov(T_ebx, loOperand(Val));	3760 _mov(T_ebx, loOperand(Val));

3684 _mov(T_ecx, hiOperand(Val));	3761 _mov(T_ecx, hiOperand(Val));

3685 Context.insert(Label);	3762 Context.insert(Label);

3686 }	3763 }

3687 const bool Locked = true;	3764 constexpr bool Locked = true;

3688 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3765 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3689 _br(Traits::Cond::Br_ne, Label);	3766 _br(Traits::Cond::Br_ne, Label);

3690 if (!IsXchg8b) {	3767 if (!IsXchg8b) {

3691 // If Val is a variable, model the extended live range of Val through	3768 // If Val is a variable, model the extended live range of Val through

3692 // the end of the loop, since it will be re-used by the loop.	3769 // the end of the loop, since it will be re-used by the loop.

3693 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3770 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3694 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));	3771 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));

3695 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));	3772 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));

3696 Context.insert(InstFakeUse::create(Func, ValLo));	3773 Context.insert(InstFakeUse::create(Func, ValLo));

3697 Context.insert(InstFakeUse::create(Func, ValHi));	3774 Context.insert(InstFakeUse::create(Func, ValHi));

3698 }	3775 }

3699 } else {	3776 } else {

3700 // For xchg, the loop is slightly smaller and ebx/ecx are used.	3777 // For xchg, the loop is slightly smaller and ebx/ecx are used.

3701 Context.insert(InstFakeUse::create(Func, T_ebx));	3778 Context.insert(InstFakeUse::create(Func, T_ebx));

3702 Context.insert(InstFakeUse::create(Func, T_ecx));	3779 Context.insert(InstFakeUse::create(Func, T_ecx));

3703 }	3780 }

3704 // The address base (if any) is also reused in the loop.	3781 // The address base (if any) is also reused in the loop.

3705 if (Variable *Base = Addr->getBase())	3782 if (Variable *Base = Addr->getBase())

3706 Context.insert(InstFakeUse::create(Func, Base));	3783 Context.insert(InstFakeUse::create(Func, Base));

3707 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3784 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3708 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3785 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3709 _mov(DestLo, T_eax);	3786 _mov(DestLo, T_eax);

3710 _mov(DestHi, T_edx);	3787 _mov(DestHi, T_edx);

3711 return;	3788 return;

3712 }	3789 }

3713 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);	3790 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3714 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);	3791 int32_t Eax;

	3792 switch (Ty) {

	3793 default:

	3794 llvm_unreachable("Bad type for atomicRMW");

	3795 // fallthrough

	3796 case IceType_i32:

	3797 Eax = Traits::RegisterSet::Reg_eax;

	3798 break;

	3799 case IceType_i16:

	3800 Eax = Traits::RegisterSet::Reg_ax;

	3801 break;

	3802 case IceType_i8:

	3803 Eax = Traits::RegisterSet::Reg_al;

	3804 break;

	3805 }

	3806 Variable *T_eax = makeReg(Ty, Eax);

3715 _mov(T_eax, Addr);	3807 _mov(T_eax, Addr);

3716 typename Traits::Insts::Label *Label =	3808 typename Traits::Insts::Label *Label =

3717 Traits::Insts::Label::create(Func, this);	3809 Traits::Insts::Label::create(Func, this);

3718 Context.insert(Label);	3810 Context.insert(Label);

3719 // We want to pick a different register for T than Eax, so don't use	3811 // We want to pick a different register for T than Eax, so don't use

3720 // _mov(T == nullptr, T_eax).	3812 // _mov(T == nullptr, T_eax).

3721 Variable *T = makeReg(Ty);	3813 Variable *T = makeReg(Ty);

3722 _mov(T, T_eax);	3814 _mov(T, T_eax);

3723 (this->*Op_Lo)(T, Val);	3815 (this->*Op_Lo)(T, Val);

3724 const bool Locked = true;	3816 constexpr bool Locked = true;

3725 _cmpxchg(Addr, T_eax, T, Locked);	3817 _cmpxchg(Addr, T_eax, T, Locked);

3726 _br(Traits::Cond::Br_ne, Label);	3818 _br(Traits::Cond::Br_ne, Label);

3727 // If Val is a variable, model the extended live range of Val through	3819 // If Val is a variable, model the extended live range of Val through

3728 // the end of the loop, since it will be re-used by the loop.	3820 // the end of the loop, since it will be re-used by the loop.

3729 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3821 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3730 Context.insert(InstFakeUse::create(Func, ValVar));	3822 Context.insert(InstFakeUse::create(Func, ValVar));

3731 }	3823 }

3732 // The address base (if any) is also reused in the loop.	3824 // The address base (if any) is also reused in the loop.

3733 if (Variable *Base = Addr->getBase())	3825 if (Variable *Base = Addr->getBase())

3734 Context.insert(InstFakeUse::create(Func, Base));	3826 Context.insert(InstFakeUse::create(Func, Base));

(...skipping 1476 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5211 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|	5303 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

5212 Ty == IceType_v16i8);	5304 Ty == IceType_v16i8);

5213 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {	5305 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

5214 Variable *Reg = makeVectorOfOnes(Ty, RegNum);	5306 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

5215 SizeT Shift =	5307 SizeT Shift =

5216 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;	5308 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;

5217 _psll(Reg, Ctx->getConstantInt8(Shift));	5309 _psll(Reg, Ctx->getConstantInt8(Shift));

5218 return Reg;	5310 return Reg;

5219 } else {	5311 } else {

5220 // SSE has no left shift operation for vectors of 8 bit integers.	5312 // SSE has no left shift operation for vectors of 8 bit integers.

5221 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	5313 constexpr uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

5222 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);	5314 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

5223 Variable *Reg = makeReg(Ty, RegNum);	5315 Variable *Reg = makeReg(Ty, RegNum);

5224 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	5316 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

5225 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	5317 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

5226 return Reg;	5318 return Reg;

5227 }	5319 }

5228 }	5320 }

5229	5321

5230 /// Construct a mask in a register that can be and'ed with a floating-point	5322 /// Construct a mask in a register that can be and'ed with a floating-point

5231 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32	5323 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32

(...skipping 13 matching lines...) Expand all Loading...
5245 typename TargetX86Base<Machine>::Traits::X86OperandMem *	5337 typename TargetX86Base<Machine>::Traits::X86OperandMem *

5246 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,	5338 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

5247 uint32_t Offset) {	5339 uint32_t Offset) {

5248 // Ensure that Loc is a stack slot.	5340 // Ensure that Loc is a stack slot.

5249 assert(Slot->mustNotHaveReg());	5341 assert(Slot->mustNotHaveReg());

5250 assert(Slot->getRegNum() == Variable::NoRegister);	5342 assert(Slot->getRegNum() == Variable::NoRegister);

5251 // Compute the location of Loc in memory.	5343 // Compute the location of Loc in memory.

5252 // TODO(wala,stichnot): lea should not	5344 // TODO(wala,stichnot): lea should not

5253 // be required. The address of the stack slot is known at compile time	5345 // be required. The address of the stack slot is known at compile time

5254 // (although not until after addProlog()).	5346 // (although not until after addProlog()).

5255 const Type PointerType = IceType_i32;	5347 constexpr Type PointerType = IceType_i32;

5256 Variable *Loc = makeReg(PointerType);	5348 Variable *Loc = makeReg(PointerType);

5257 _lea(Loc, Slot);	5349 _lea(Loc, Slot);

5258 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);	5350 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

5259 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);	5351 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);

5260 }	5352 }

5261	5353

5262 /// Helper for legalize() to emit the right code to lower an operand to a	5354 /// Helper for legalize() to emit the right code to lower an operand to a

5263 /// register of the appropriate type.	5355 /// register of the appropriate type.

5264 template <class Machine>	5356 template <class Machine>

5265 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {	5357 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5298 if (Subst->mustHaveReg() && !Subst->hasReg()) {	5390 if (Subst->mustHaveReg() && !Subst->hasReg()) {

5299 // At this point we know the substitution will have a register.	5391 // At this point we know the substitution will have a register.

5300 if (From->getType() == Subst->getType()) {	5392 if (From->getType() == Subst->getType()) {

5301 // At this point we know the substitution's register is compatible.	5393 // At this point we know the substitution's register is compatible.

5302 return Subst;	5394 return Subst;

5303 }	5395 }

5304 }	5396 }

5305 }	5397 }

5306 }	5398 }

5307	5399

5308 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {	5400 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {

5309 // Before doing anything with a Mem operand, we need to ensure that the	5401 // Before doing anything with a Mem operand, we need to ensure that the

5310 // Base and Index components are in physical registers.	5402 // Base and Index components are in physical registers.

5311 Variable *Base = Mem->getBase();	5403 Variable *Base = Mem->getBase();

5312 Variable *Index = Mem->getIndex();	5404 Variable *Index = Mem->getIndex();

5313 Variable *RegBase = nullptr;	5405 Variable *RegBase = nullptr;

5314 Variable *RegIndex = nullptr;	5406 Variable *RegIndex = nullptr;

5315 if (Base) {	5407 if (Base) {

5316 RegBase = legalizeToReg(Base);	5408 RegBase = legalizeToReg(Base);

5317 }	5409 }

5318 if (Index) {	5410 if (Index) {

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5376 // Immediate specifically not allowed	5468 // Immediate specifically not allowed

5377 NeedsReg = true;	5469 NeedsReg = true;

5378 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))	5470 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))

5379 // On x86, FP constants are lowered to mem operands.	5471 // On x86, FP constants are lowered to mem operands.

5380 NeedsReg = true;	5472 NeedsReg = true;

5381 if (NeedsReg) {	5473 if (NeedsReg) {

5382 From = copyToReg(From, RegNum);	5474 From = copyToReg(From, RegNum);

5383 }	5475 }

5384 return From;	5476 return From;

5385 }	5477 }

5386 if (auto Var = llvm::dyn_cast<Variable>(From)) {	5478 if (auto *Var = llvm::dyn_cast<Variable>(From)) {

5387 // Check if the variable is guaranteed a physical register. This can happen	5479 // Check if the variable is guaranteed a physical register. This can happen

5388 // either when the variable is pre-colored or when it is assigned infinite	5480 // either when the variable is pre-colored or when it is assigned infinite

5389 // weight.	5481 // weight.

5390 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());	5482 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());

5391 // We need a new physical register for the operand if:	5483 // We need a new physical register for the operand if:

5392 // Mem is not allowed and Var isn't guaranteed a physical	5484 // Mem is not allowed and Var isn't guaranteed a physical

5393 // register, or	5485 // register, or

5394 // RegNum is required and Var->getRegNum() doesn't match.	5486 // RegNum is required and Var->getRegNum() doesn't match.

5395 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|	5487 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|

5396 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {	5488 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

(...skipping 234 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5631 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);	5723 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);

5632 Immediate->setShouldBePooled(true);	5724 Immediate->setShouldBePooled(true);

5633 // if we have already assigned a phy register, we must come from	5725 // if we have already assigned a phy register, we must come from

5634 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the	5726 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the

5635 // assigned register as this assignment is that start of its use-def	5727 // assigned register as this assignment is that start of its use-def

5636 // chain. So we add RegNum argument here.	5728 // chain. So we add RegNum argument here.

5637 Variable *Reg = makeReg(Immediate->getType(), RegNum);	5729 Variable *Reg = makeReg(Immediate->getType(), RegNum);

5638 IceString Label;	5730 IceString Label;

5639 llvm::raw_string_ostream Label_stream(Label);	5731 llvm::raw_string_ostream Label_stream(Label);

5640 Immediate->emitPoolLabel(Label_stream, Ctx);	5732 Immediate->emitPoolLabel(Label_stream, Ctx);

5641 const RelocOffsetT Offset = 0;	5733 constexpr RelocOffsetT Offset = 0;

5642 const bool SuppressMangling = true;	5734 constexpr bool SuppressMangling = true;

5643 Constant *Symbol =	5735 Constant *Symbol =

5644 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);	5736 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);

5645 typename Traits::X86OperandMem *MemOperand =	5737 typename Traits::X86OperandMem *MemOperand =

5646 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,	5738 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,

5647 Symbol);	5739 Symbol);

5648 _mov(Reg, MemOperand);	5740 _mov(Reg, MemOperand);

5649 return Reg;	5741 return Reg;

5650 }	5742 }

5651 assert("Unsupported -randomize-pool-immediates option" && false);	5743 assert("Unsupported -randomize-pool-immediates option" && false);

5652 }	5744 }

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5728 // phi lowering, we should not ask for new physical registers in	5820 // phi lowering, we should not ask for new physical registers in

5729 // general. However, if we do meet Memory Operand during phi lowering,	5821 // general. However, if we do meet Memory Operand during phi lowering,

5730 // we should not blind or pool the immediates for now.	5822 // we should not blind or pool the immediates for now.

5731 if (RegNum != Variable::NoRegister)	5823 if (RegNum != Variable::NoRegister)

5732 return MemOperand;	5824 return MemOperand;

5733 Variable *RegTemp = makeReg(IceType_i32);	5825 Variable *RegTemp = makeReg(IceType_i32);

5734 IceString Label;	5826 IceString Label;

5735 llvm::raw_string_ostream Label_stream(Label);	5827 llvm::raw_string_ostream Label_stream(Label);

5736 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);	5828 MemOperand->getOffset()->emitPoolLabel(Label_stream, Ctx);

5737 MemOperand->getOffset()->setShouldBePooled(true);	5829 MemOperand->getOffset()->setShouldBePooled(true);

5738 const RelocOffsetT SymOffset = 0;	5830 constexpr RelocOffsetT SymOffset = 0;

5739 bool SuppressMangling = true;	5831 constexpr bool SuppressMangling = true;

5740 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),	5832 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),

5741 SuppressMangling);	5833 SuppressMangling);

5742 typename Traits::X86OperandMem *SymbolOperand =	5834 typename Traits::X86OperandMem *SymbolOperand =

5743 Traits::X86OperandMem::create(	5835 Traits::X86OperandMem::create(

5744 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);	5836 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);

5745 _mov(RegTemp, SymbolOperand);	5837 _mov(RegTemp, SymbolOperand);

5746 // If we have a base variable here, we should add the lea instruction	5838 // If we have a base variable here, we should add the lea instruction

5747 // to add the value of the base variable to RegTemp. If there is no	5839 // to add the value of the base variable to RegTemp. If there is no

5748 // base variable, we won't need this lea instruction.	5840 // base variable, we won't need this lea instruction.

5749 if (MemOperand->getBase()) {	5841 if (MemOperand->getBase()) {

(...skipping 15 matching lines...) Expand all Loading...
5765 }	5857 }

5766 // the offset is not eligible for blinding or pooling, return the original	5858 // the offset is not eligible for blinding or pooling, return the original

5767 // mem operand	5859 // mem operand

5768 return MemOperand;	5860 return MemOperand;

5769 }	5861 }

5770	5862

5771 } // end of namespace X86Internal	5863 } // end of namespace X86Internal

5772 } // end of namespace Ice	5864 } // end of namespace Ice

5773	5865

5774 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5866 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8664Traits.h ('k') | src/IceTimerTree.cpp » ('j') | no next file with comments »