src/IceTargetLoweringX86BaseImpl.h - Issue 1427973003: Subzero: Refactor x86 register representation to actively use aliases.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1427973003: Subzero: Refactor x86 register representation to actively use aliases. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Reformat Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1198 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1209 }	1209 }

1210 } else {	1210 } else {

1211 // NON-CONSTANT CASES.	1211 // NON-CONSTANT CASES.

1212 Constant *BitTest = Ctx->getConstantInt32(0x20);	1212 Constant *BitTest = Ctx->getConstantInt32(0x20);

1213 typename Traits::Insts::Label *Label =	1213 typename Traits::Insts::Label *Label =

1214 Traits::Insts::Label::create(Func, this);	1214 Traits::Insts::Label::create(Func, this);

1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==>	1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==>

1216 // t1:ecx = c.lo & 0xff	1216 // t1:ecx = c.lo & 0xff

1217 // t2 = b.lo	1217 // t2 = b.lo

1218 // t3 = b.hi	1218 // t3 = b.hi

1219 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);	1219 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);

1220 _mov(T_1, Src1Lo);

1221 _mov(T_2, Src0Lo);	1220 _mov(T_2, Src0Lo);

1222 _mov(T_3, Src0Hi);	1221 _mov(T_3, Src0Hi);

1223 switch (Op) {	1222 switch (Op) {

1224 default:	1223 default:

1225 assert(0 && "non-shift op");	1224 assert(0 && "non-shift op");

1226 break;	1225 break;

1227 case InstArithmetic::Shl: {	1226 case InstArithmetic::Shl: {

1228 // a=b<<c ==>	1227 // a=b<<c ==>

1229 // t3 = shld t3, t2, t1	1228 // t3 = shld t3, t2, t1

1230 // t2 = shl t2, t1	1229 // t2 = shl t2, t1

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1288 // a.hi = t3	1287 // a.hi = t3

1289 Context.insert(Label);	1288 Context.insert(Label);

1290 _mov(DestLo, T_2);	1289 _mov(DestLo, T_2);

1291 _mov(DestHi, T_3);	1290 _mov(DestHi, T_3);

1292 }	1291 }

1293 }	1292 }

1294	1293

1295 template <class Machine>	1294 template <class Machine>

1296 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {	1295 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

1297 Variable *Dest = Inst->getDest();	1296 Variable *Dest = Inst->getDest();

	1297 Type Ty = Dest->getType();

1298 Operand *Src0 = legalize(Inst->getSrc(0));	1298 Operand *Src0 = legalize(Inst->getSrc(0));

1299 Operand *Src1 = legalize(Inst->getSrc(1));	1299 Operand *Src1 = legalize(Inst->getSrc(1));

1300 if (Inst->isCommutative()) {	1300 if (Inst->isCommutative()) {

1301 uint32_t SwapCount = 0;	1301 uint32_t SwapCount = 0;

1302 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {	1302 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {

1303 std::swap(Src0, Src1);	1303 std::swap(Src0, Src1);

1304 ++SwapCount;	1304 ++SwapCount;

1305 }	1305 }

1306 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {	1306 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {

1307 std::swap(Src0, Src1);	1307 std::swap(Src0, Src1);

1308 ++SwapCount;	1308 ++SwapCount;

1309 }	1309 }

1310 // Improve two-address code patterns by avoiding a copy to the dest	1310 // Improve two-address code patterns by avoiding a copy to the dest

1311 // register when one of the source operands ends its lifetime here.	1311 // register when one of the source operands ends its lifetime here.

1312 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) {	1312 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) {

1313 std::swap(Src0, Src1);	1313 std::swap(Src0, Src1);

1314 ++SwapCount;	1314 ++SwapCount;

1315 }	1315 }

1316 assert(SwapCount <= 1);	1316 assert(SwapCount <= 1);

1317 (void)SwapCount;	1317 (void)SwapCount;

1318 }	1318 }

1319 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	1319 if (!Traits::Is64Bit && Ty == IceType_i64) {

1320 // These x86-32 helper-call-involved instructions are lowered in this	1320 // These x86-32 helper-call-involved instructions are lowered in this

1321 // separate switch. This is because loOperand() and hiOperand() may insert	1321 // separate switch. This is because loOperand() and hiOperand() may insert

1322 // redundant instructions for constant blinding and pooling. Such redundant	1322 // redundant instructions for constant blinding and pooling. Such redundant

1323 // instructions will fail liveness analysis under -Om1 setting. And,	1323 // instructions will fail liveness analysis under -Om1 setting. And,

1324 // actually these arguments do not need to be processed with loOperand()	1324 // actually these arguments do not need to be processed with loOperand()

1325 // and hiOperand() to be used.	1325 // and hiOperand() to be used.

1326 switch (Inst->getOp()) {	1326 switch (Inst->getOp()) {

1327 case InstArithmetic::Udiv: {	1327 case InstArithmetic::Udiv: {

1328 constexpr SizeT MaxSrcs = 2;	1328 constexpr SizeT MaxSrcs = 2;

1329 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);	1329 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);

(...skipping 126 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1456 case InstArithmetic::Udiv:	1456 case InstArithmetic::Udiv:

1457 case InstArithmetic::Sdiv:	1457 case InstArithmetic::Sdiv:

1458 case InstArithmetic::Urem:	1458 case InstArithmetic::Urem:

1459 case InstArithmetic::Srem:	1459 case InstArithmetic::Srem:

1460 llvm_unreachable("Call-helper-involved instruction for i64 type \	1460 llvm_unreachable("Call-helper-involved instruction for i64 type \

1461 should have already been handled before");	1461 should have already been handled before");

1462 break;	1462 break;

1463 }	1463 }

1464 return;	1464 return;

1465 }	1465 }

1466 if (isVectorType(Dest->getType())) {	1466 if (isVectorType(Ty)) {

1467 // TODO: Trap on integer divide and integer modulo by zero. See:	1467 // TODO: Trap on integer divide and integer modulo by zero. See:

1468 // https://code.google.com/p/nativeclient/issues/detail?id=3899	1468 // https://code.google.com/p/nativeclient/issues/detail?id=3899

1469 if (llvm::isa<typename Traits::X86OperandMem>(Src1))	1469 if (llvm::isa<typename Traits::X86OperandMem>(Src1))

1470 Src1 = legalizeToReg(Src1);	1470 Src1 = legalizeToReg(Src1);

1471 switch (Inst->getOp()) {	1471 switch (Inst->getOp()) {

1472 case InstArithmetic::_num:	1472 case InstArithmetic::_num:

1473 llvm_unreachable("Unknown arithmetic operator");	1473 llvm_unreachable("Unknown arithmetic operator");

1474 break;	1474 break;

1475 case InstArithmetic::Add: {	1475 case InstArithmetic::Add: {

1476 Variable *T = makeReg(Dest->getType());	1476 Variable *T = makeReg(Ty);

1477 _movp(T, Src0);	1477 _movp(T, Src0);

1478 _padd(T, Src1);	1478 _padd(T, Src1);

1479 _movp(Dest, T);	1479 _movp(Dest, T);

1480 } break;	1480 } break;

1481 case InstArithmetic::And: {	1481 case InstArithmetic::And: {

1482 Variable *T = makeReg(Dest->getType());	1482 Variable *T = makeReg(Ty);

1483 _movp(T, Src0);	1483 _movp(T, Src0);

1484 _pand(T, Src1);	1484 _pand(T, Src1);

1485 _movp(Dest, T);	1485 _movp(Dest, T);

1486 } break;	1486 } break;

1487 case InstArithmetic::Or: {	1487 case InstArithmetic::Or: {

1488 Variable *T = makeReg(Dest->getType());	1488 Variable *T = makeReg(Ty);

1489 _movp(T, Src0);	1489 _movp(T, Src0);

1490 _por(T, Src1);	1490 _por(T, Src1);

1491 _movp(Dest, T);	1491 _movp(Dest, T);

1492 } break;	1492 } break;

1493 case InstArithmetic::Xor: {	1493 case InstArithmetic::Xor: {

1494 Variable *T = makeReg(Dest->getType());	1494 Variable *T = makeReg(Ty);

1495 _movp(T, Src0);	1495 _movp(T, Src0);

1496 _pxor(T, Src1);	1496 _pxor(T, Src1);

1497 _movp(Dest, T);	1497 _movp(Dest, T);

1498 } break;	1498 } break;

1499 case InstArithmetic::Sub: {	1499 case InstArithmetic::Sub: {

1500 Variable *T = makeReg(Dest->getType());	1500 Variable *T = makeReg(Ty);

1501 _movp(T, Src0);	1501 _movp(T, Src0);

1502 _psub(T, Src1);	1502 _psub(T, Src1);

1503 _movp(Dest, T);	1503 _movp(Dest, T);

1504 } break;	1504 } break;

1505 case InstArithmetic::Mul: {	1505 case InstArithmetic::Mul: {

1506 bool TypesAreValidForPmull =	1506 bool TypesAreValidForPmull = Ty == IceType_v4i32 \|\| Ty == IceType_v8i16;

1507 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1508 bool InstructionSetIsValidForPmull =	1507 bool InstructionSetIsValidForPmull =

1509 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= Traits::SSE4_1;	1508 Ty == IceType_v8i16 \|\| InstructionSet >= Traits::SSE4_1;

1510 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {	1509 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1511 Variable *T = makeReg(Dest->getType());	1510 Variable *T = makeReg(Ty);

1512 _movp(T, Src0);	1511 _movp(T, Src0);

1513 _pmull(T, Src0 == Src1 ? T : Src1);	1512 _pmull(T, Src0 == Src1 ? T : Src1);

1514 _movp(Dest, T);	1513 _movp(Dest, T);

1515 } else if (Dest->getType() == IceType_v4i32) {	1514 } else if (Ty == IceType_v4i32) {

1516 // Lowering sequence:	1515 // Lowering sequence:

1517 // Note: The mask arguments have index 0 on the left.	1516 // Note: The mask arguments have index 0 on the left.

1518 //	1517 //

1519 // movups T1, Src0	1518 // movups T1, Src0

1520 // pshufd T2, Src0, {1,0,3,0}	1519 // pshufd T2, Src0, {1,0,3,0}

1521 // pshufd T3, Src1, {1,0,3,0}	1520 // pshufd T3, Src1, {1,0,3,0}

1522 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}	1521 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

1523 // pmuludq T1, Src1	1522 // pmuludq T1, Src1

1524 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}	1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

1525 // pmuludq T2, T3	1524 // pmuludq T2, T3

(...skipping 17 matching lines...) Expand all Loading...
1543 Variable *T3 = makeReg(IceType_v4i32);	1542 Variable *T3 = makeReg(IceType_v4i32);

1544 Variable *T4 = makeReg(IceType_v4i32);	1543 Variable *T4 = makeReg(IceType_v4i32);

1545 _movp(T1, Src0);	1544 _movp(T1, Src0);

1546 _pshufd(T2, Src0, Mask1030);	1545 _pshufd(T2, Src0, Mask1030);

1547 _pshufd(T3, Src1, Mask1030);	1546 _pshufd(T3, Src1, Mask1030);

1548 _pmuludq(T1, Src1);	1547 _pmuludq(T1, Src1);

1549 _pmuludq(T2, T3);	1548 _pmuludq(T2, T3);

1550 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));	1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

1551 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));	1550 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));

1552 _movp(Dest, T4);	1551 _movp(Dest, T4);

1553 } else if (Dest->getType() == IceType_v16i8) {	1552 } else if (Ty == IceType_v16i8) {

1554 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1553 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1555 } else {	1554 } else {

1556 llvm::report_fatal_error("Invalid vector multiply type");	1555 llvm::report_fatal_error("Invalid vector multiply type");

1557 }	1556 }

1558 } break;	1557 } break;

1559 case InstArithmetic::Shl:	1558 case InstArithmetic::Shl:

1560 case InstArithmetic::Lshr:	1559 case InstArithmetic::Lshr:

1561 case InstArithmetic::Ashr:	1560 case InstArithmetic::Ashr:

1562 case InstArithmetic::Udiv:	1561 case InstArithmetic::Udiv:

1563 case InstArithmetic::Urem:	1562 case InstArithmetic::Urem:

1564 case InstArithmetic::Sdiv:	1563 case InstArithmetic::Sdiv:

1565 case InstArithmetic::Srem:	1564 case InstArithmetic::Srem:

1566 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1565 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1567 break;	1566 break;

1568 case InstArithmetic::Fadd: {	1567 case InstArithmetic::Fadd: {

1569 Variable *T = makeReg(Dest->getType());	1568 Variable *T = makeReg(Ty);

1570 _movp(T, Src0);	1569 _movp(T, Src0);

1571 _addps(T, Src1);	1570 _addps(T, Src1);

1572 _movp(Dest, T);	1571 _movp(Dest, T);

1573 } break;	1572 } break;

1574 case InstArithmetic::Fsub: {	1573 case InstArithmetic::Fsub: {

1575 Variable *T = makeReg(Dest->getType());	1574 Variable *T = makeReg(Ty);

1576 _movp(T, Src0);	1575 _movp(T, Src0);

1577 _subps(T, Src1);	1576 _subps(T, Src1);

1578 _movp(Dest, T);	1577 _movp(Dest, T);

1579 } break;	1578 } break;

1580 case InstArithmetic::Fmul: {	1579 case InstArithmetic::Fmul: {

1581 Variable *T = makeReg(Dest->getType());	1580 Variable *T = makeReg(Ty);

1582 _movp(T, Src0);	1581 _movp(T, Src0);

1583 _mulps(T, Src0 == Src1 ? T : Src1);	1582 _mulps(T, Src0 == Src1 ? T : Src1);

1584 _movp(Dest, T);	1583 _movp(Dest, T);

1585 } break;	1584 } break;

1586 case InstArithmetic::Fdiv: {	1585 case InstArithmetic::Fdiv: {

1587 Variable *T = makeReg(Dest->getType());	1586 Variable *T = makeReg(Ty);

1588 _movp(T, Src0);	1587 _movp(T, Src0);

1589 _divps(T, Src1);	1588 _divps(T, Src1);

1590 _movp(Dest, T);	1589 _movp(Dest, T);

1591 } break;	1590 } break;

1592 case InstArithmetic::Frem:	1591 case InstArithmetic::Frem:

1593 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1592 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1594 break;	1593 break;

1595 }	1594 }

1596 return;	1595 return;

1597 }	1596 }

(...skipping 28 matching lines...) Expand all Loading...
1626 _sub(T, Src1);	1625 _sub(T, Src1);

1627 _mov(Dest, T);	1626 _mov(Dest, T);

1628 break;	1627 break;

1629 case InstArithmetic::Mul:	1628 case InstArithmetic::Mul:

1630 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1631 if (optimizeScalarMul(Dest, Src0, C->getValue()))	1630 if (optimizeScalarMul(Dest, Src0, C->getValue()))

1632 return;	1631 return;

1633 }	1632 }

1634 // The 8-bit version of imul only allows the form "imul r/m8" where T must	1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must

1635 // be in al.	1634 // be in al.

1636 if (isByteSizedArithType(Dest->getType())) {	1635 if (isByteSizedArithType(Ty)) {

1637 _mov(T, Src0, Traits::RegisterSet::Reg_al);	1636 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1638 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1637 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1639 _imul(T, Src0 == Src1 ? T : Src1);	1638 _imul(T, Src0 == Src1 ? T : Src1);

1640 _mov(Dest, T);	1639 _mov(Dest, T);

1641 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1642 T = makeReg(Dest->getType());	1641 T = makeReg(Ty);

1643 _imul_imm(T, Src0, ImmConst);	1642 _imul_imm(T, Src0, ImmConst);

1644 _mov(Dest, T);	1643 _mov(Dest, T);

1645 } else {	1644 } else {

1646 _mov(T, Src0);	1645 _mov(T, Src0);

1647 _imul(T, Src0 == Src1 ? T : Src1);	1646 _imul(T, Src0 == Src1 ? T : Src1);

1648 _mov(Dest, T);	1647 _mov(Dest, T);

1649 }	1648 }

1650 break;	1649 break;

1651 case InstArithmetic::Shl:	1650 case InstArithmetic::Shl:

1652 _mov(T, Src0);	1651 _mov(T, Src0);

1653 if (!llvm::isa<ConstantInteger32>(Src1)) {	1652 if (!llvm::isa<ConstantInteger32>(Src1))

1654 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);	1653 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);

1655 _mov(Cl, Src1);

1656 Src1 = Cl;

1657 }

1658 _shl(T, Src1);	1654 _shl(T, Src1);

1659 _mov(Dest, T);	1655 _mov(Dest, T);

1660 break;	1656 break;

1661 case InstArithmetic::Lshr:	1657 case InstArithmetic::Lshr:

1662 _mov(T, Src0);	1658 _mov(T, Src0);

1663 if (!llvm::isa<ConstantInteger32>(Src1)) {	1659 if (!llvm::isa<ConstantInteger32>(Src1))

1664 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);	1660 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);

1665 _mov(Cl, Src1);

1666 Src1 = Cl;

1667 }

1668 _shr(T, Src1);	1661 _shr(T, Src1);

1669 _mov(Dest, T);	1662 _mov(Dest, T);

1670 break;	1663 break;

1671 case InstArithmetic::Ashr:	1664 case InstArithmetic::Ashr:

1672 _mov(T, Src0);	1665 _mov(T, Src0);

1673 if (!llvm::isa<ConstantInteger32>(Src1)) {	1666 if (!llvm::isa<ConstantInteger32>(Src1))

1674 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl);	1667 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);

1675 _mov(Cl, Src1);

1676 Src1 = Cl;

1677 }

1678 _sar(T, Src1);	1668 _sar(T, Src1);

1679 _mov(Dest, T);	1669 _mov(Dest, T);

1680 break;	1670 break;

1681 case InstArithmetic::Udiv:	1671 case InstArithmetic::Udiv: {

1682 // div and idiv are the few arithmetic operators that do not allow	1672 // div and idiv are the few arithmetic operators that do not allow

1683 // immediates as the operand.	1673 // immediates as the operand.

1684 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1674 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1685 if (isByteSizedArithType(Dest->getType())) {	1675 uint32_t Eax = Traits::RegisterSet::Reg_eax;

1686 // For 8-bit unsigned division we need to zero-extend al into ah. A mov	1676 uint32_t Edx = Traits::RegisterSet::Reg_edx;

1687 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64	1677 switch (Ty) {

1688 // assembler refuses to encode %ah (encoding %spl with a REX prefix	1678 default:

1689 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah	1679 llvm_unreachable("Bad type for udiv");

1690 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and	1680 // fallthrough

1691 // d[%lh], which means the X86 target lowering (and the register	1681 case IceType_i32:

1692 // allocator) would have to be aware of this restriction. For now, we	1682 break;

1693 // simply zero %eax completely, and move the dividend into %al.	1683 case IceType_i16:

1694 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1684 Eax = Traits::RegisterSet::Reg_ax;

1695 Context.insert(InstFakeDef::create(Func, T_eax));	1685 Edx = Traits::RegisterSet::Reg_dx;

1696 _xor(T_eax, T_eax);	1686 break;

1697 _mov(T, Src0, Traits::RegisterSet::Reg_al);	1687 case IceType_i8:

1698 _div(T, Src1, T);	1688 Eax = Traits::RegisterSet::Reg_al;

1699 _mov(Dest, T);	1689 Edx = Traits::RegisterSet::Reg_ah;

1700 Context.insert(InstFakeUse::create(Func, T_eax));	1690 break;

1701 } else {

1702 Type Ty = Dest->getType();

1703 uint32_t Eax = Traits::RegisterSet::Reg_eax;

1704 uint32_t Edx = Traits::RegisterSet::Reg_edx;

1705 switch (Ty) {

1706 default:

1707 llvm_unreachable("Bad type for udiv");

1708 // fallthrough

1709 case IceType_i32:

1710 break;

1711 case IceType_i16:

1712 Eax = Traits::RegisterSet::Reg_ax;

1713 Edx = Traits::RegisterSet::Reg_dx;

1714 break;

1715 }

1716 Constant *Zero = Ctx->getConstantZero(Ty);

1717 _mov(T, Src0, Eax);

1718 _mov(T_edx, Zero, Edx);

1719 _div(T, Src1, T_edx);

1720 _mov(Dest, T);

1721 }	1691 }

1722 break;	1692 _mov(T, Src0, Eax);

	1693 _mov(T_edx, Ctx->getConstantZero(Ty), Edx);

	1694 _div(T, Src1, T_edx);

	1695 _mov(Dest, T);

	1696 } break;

1723 case InstArithmetic::Sdiv:	1697 case InstArithmetic::Sdiv:

1724 // TODO(stichnot): Enable this after doing better performance and cross	1698 // TODO(stichnot): Enable this after doing better performance and cross

1725 // testing.	1699 // testing.

1726 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1700 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1727 // Optimize division by constant power of 2, but not for Om1 or O0, just	1701 // Optimize division by constant power of 2, but not for Om1 or O0, just

1728 // to keep things simple there.	1702 // to keep things simple there.

1729 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1703 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1730 int32_t Divisor = C->getValue();	1704 int32_t Divisor = C->getValue();

1731 uint32_t UDivisor = static_cast<uint32_t>(Divisor);	1705 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

1732 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	1706 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

1733 uint32_t LogDiv = llvm::Log2_32(UDivisor);	1707 uint32_t LogDiv = llvm::Log2_32(UDivisor);

1734 Type Ty = Dest->getType();

1735 // LLVM does the following for dest=src/(1<<log):	1708 // LLVM does the following for dest=src/(1<<log):

1736 // t=src	1709 // t=src

1737 // sar t,typewidth-1 // -1 if src is negative, 0 if not	1710 // sar t,typewidth-1 // -1 if src is negative, 0 if not

1738 // shr t,typewidth-log	1711 // shr t,typewidth-log

1739 // add t,src	1712 // add t,src

1740 // sar t,log	1713 // sar t,log

1741 // dest=t	1714 // dest=t

1742 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);	1715 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

1743 _mov(T, Src0);	1716 _mov(T, Src0);

1744 // If for some reason we are dividing by 1, just treat it like an	1717 // If for some reason we are dividing by 1, just treat it like an

1745 // assignment.	1718 // assignment.

1746 if (LogDiv > 0) {	1719 if (LogDiv > 0) {

1747 // The initial sar is unnecessary when dividing by 2.	1720 // The initial sar is unnecessary when dividing by 2.

1748 if (LogDiv > 1)	1721 if (LogDiv > 1)

1749 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));	1722 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

1750 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	1723 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

1751 _add(T, Src0);	1724 _add(T, Src0);

1752 _sar(T, Ctx->getConstantInt(Ty, LogDiv));	1725 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

1753 }	1726 }

1754 _mov(Dest, T);	1727 _mov(Dest, T);

1755 return;	1728 return;

1756 }	1729 }

1757 }	1730 }

1758 }	1731 }

1759 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1732 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1760 switch (Type Ty = Dest->getType()) {	1733 switch (Ty) {

1761 default:	1734 default:

1762 llvm_unreachable("Bad type for sdiv");	1735 llvm_unreachable("Bad type for sdiv");

1763 // fallthrough	1736 // fallthrough

1764 case IceType_i32:	1737 case IceType_i32:

1765 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);	1738 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1766 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1739 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1767 break;	1740 break;

1768 case IceType_i16:	1741 case IceType_i16:

1769 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);	1742 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);

1770 _mov(T, Src0, Traits::RegisterSet::Reg_ax);	1743 _mov(T, Src0, Traits::RegisterSet::Reg_ax);

1771 break;	1744 break;

1772 case IceType_i8:	1745 case IceType_i8:

1773 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);	1746 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);

1774 _mov(T, Src0, Traits::RegisterSet::Reg_al);	1747 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1775 break;	1748 break;

1776 }	1749 }

1777 _cbwdq(T_edx, T);	1750 _cbwdq(T_edx, T);

1778 _idiv(T, Src1, T_edx);	1751 _idiv(T, Src1, T_edx);

1779 _mov(Dest, T);	1752 _mov(Dest, T);

1780 break;	1753 break;

1781 case InstArithmetic::Urem:	1754 case InstArithmetic::Urem: {

1782 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1755 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1783 if (isByteSizedArithType(Dest->getType())) {	1756 uint32_t Eax = Traits::RegisterSet::Reg_eax;

1784 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);	1757 uint32_t Edx = Traits::RegisterSet::Reg_edx;

1785 Context.insert(InstFakeDef::create(Func, T_eax));	1758 switch (Ty) {

1786 _xor(T_eax, T_eax);	1759 default:

1787 _mov(T, Src0, Traits::RegisterSet::Reg_al);	1760 llvm_unreachable("Bad type for urem");

1788 _div(T, Src1, T);	1761 // fallthrough

1789 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't	1762 case IceType_i32:

1790 // mov %ah, %al because it would make x86-64 codegen more complicated. If	1763 break;

1791 // this ever becomes a problem we can introduce a pseudo rem instruction	1764 case IceType_i16:

1792 // that returns the remainder in %al directly (and uses a mov for copying	1765 Eax = Traits::RegisterSet::Reg_ax;

1793 // %ah to %al.)	1766 Edx = Traits::RegisterSet::Reg_dx;

1794 static constexpr uint8_t AlSizeInBits = 8;	1767 break;

1795 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits));	1768 case IceType_i8:

1796 _mov(Dest, T);	1769 Eax = Traits::RegisterSet::Reg_al;

1797 Context.insert(InstFakeUse::create(Func, T_eax));	1770 Edx = Traits::RegisterSet::Reg_ah;

1798 } else {	1771 break;

1799 Type Ty = Dest->getType();

1800 uint32_t Eax = Traits::RegisterSet::Reg_eax;

1801 uint32_t Edx = Traits::RegisterSet::Reg_edx;

1802 switch (Ty) {

1803 default:

1804 llvm_unreachable("Bad type for urem");

1805 // fallthrough

1806 case IceType_i32:

1807 break;

1808 case IceType_i16:

1809 Eax = Traits::RegisterSet::Reg_ax;

1810 Edx = Traits::RegisterSet::Reg_dx;

1811 break;

1812 }

1813 Constant *Zero = Ctx->getConstantZero(Ty);

1814 T_edx = makeReg(Dest->getType(), Edx);

1815 _mov(T_edx, Zero);

1816 _mov(T, Src0, Eax);

1817 _div(T_edx, Src1, T);

1818 _mov(Dest, T_edx);

1819 }	1772 }

1820 break;	1773 T_edx = makeReg(Ty, Edx);

1821 case InstArithmetic::Srem:	1774 _mov(T_edx, Ctx->getConstantZero(Ty));

	1775 _mov(T, Src0, Eax);

	1776 _div(T_edx, Src1, T);

	1777 _mov(Dest, T_edx);

	1778 } break;

	1779 case InstArithmetic::Srem: {

1822 // TODO(stichnot): Enable this after doing better performance and cross	1780 // TODO(stichnot): Enable this after doing better performance and cross

1823 // testing.	1781 // testing.

1824 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1782 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1825 // Optimize mod by constant power of 2, but not for Om1 or O0, just to	1783 // Optimize mod by constant power of 2, but not for Om1 or O0, just to

1826 // keep things simple there.	1784 // keep things simple there.

1827 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1785 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1828 int32_t Divisor = C->getValue();	1786 int32_t Divisor = C->getValue();

1829 uint32_t UDivisor = static_cast<uint32_t>(Divisor);	1787 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

1830 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	1788 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

1831 uint32_t LogDiv = llvm::Log2_32(UDivisor);	1789 uint32_t LogDiv = llvm::Log2_32(UDivisor);

1832 Type Ty = Dest->getType();

1833 // LLVM does the following for dest=src%(1<<log):	1790 // LLVM does the following for dest=src%(1<<log):

1834 // t=src	1791 // t=src

1835 // sar t,typewidth-1 // -1 if src is negative, 0 if not	1792 // sar t,typewidth-1 // -1 if src is negative, 0 if not

1836 // shr t,typewidth-log	1793 // shr t,typewidth-log

1837 // add t,src	1794 // add t,src

1838 // and t, -(1<<log)	1795 // and t, -(1<<log)

1839 // sub t,src	1796 // sub t,src

1840 // neg t	1797 // neg t

1841 // dest=t	1798 // dest=t

1842 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);	1799 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

(...skipping 10 matching lines...) Expand all Loading...
1853 _add(T, Src0);	1810 _add(T, Src0);

1854 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	1811 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

1855 _sub(T, Src0);	1812 _sub(T, Src0);

1856 _neg(T);	1813 _neg(T);

1857 _mov(Dest, T);	1814 _mov(Dest, T);

1858 return;	1815 return;

1859 }	1816 }

1860 }	1817 }

1861 }	1818 }

1862 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1819 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1863 switch (Type Ty = Dest->getType()) {	1820 uint32_t Eax = Traits::RegisterSet::Reg_eax;

	1821 uint32_t Edx = Traits::RegisterSet::Reg_edx;

	1822 switch (Ty) {

1864 default:	1823 default:

1865 llvm_unreachable("Bad type for srem");	1824 llvm_unreachable("Bad type for srem");

1866 // fallthrough	1825 // fallthrough

1867 case IceType_i32:	1826 case IceType_i32:

1868 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1869 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1870 _cbwdq(T_edx, T);

1871 _idiv(T_edx, Src1, T);

1872 _mov(Dest, T_edx);

1873 break;	1827 break;

1874 case IceType_i16:	1828 case IceType_i16:

1875 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);	1829 Eax = Traits::RegisterSet::Reg_ax;

1876 _mov(T, Src0, Traits::RegisterSet::Reg_ax);	1830 Edx = Traits::RegisterSet::Reg_dx;

1877 _cbwdq(T_edx, T);

1878 _idiv(T_edx, Src1, T);

1879 _mov(Dest, T_edx);

1880 break;	1831 break;

1881 case IceType_i8:	1832 case IceType_i8:

1882 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);	1833 Eax = Traits::RegisterSet::Reg_al;

1883 // TODO(stichnot): Use register ah for T_edx, and remove the _shr().	1834 Edx = Traits::RegisterSet::Reg_ah;

1884 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);

1885 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1886 _cbwdq(T_edx, T);

1887 _idiv(T_edx, Src1, T);

1888 static constexpr uint8_t AlSizeInBits = 8;

1889 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));

1890 _mov(Dest, T_edx);

1891 break;	1835 break;

1892 }	1836 }

1893 break;	1837 T_edx = makeReg(Ty, Edx);

	1838 _mov(T, Src0, Eax);

	1839 _cbwdq(T_edx, T);

	1840 _idiv(T_edx, Src1, T);

	1841 _mov(Dest, T_edx);

	1842 } break;

1894 case InstArithmetic::Fadd:	1843 case InstArithmetic::Fadd:

1895 _mov(T, Src0);	1844 _mov(T, Src0);

1896 _addss(T, Src1);	1845 _addss(T, Src1);

1897 _mov(Dest, T);	1846 _mov(Dest, T);

1898 break;	1847 break;

1899 case InstArithmetic::Fsub:	1848 case InstArithmetic::Fsub:

1900 _mov(T, Src0);	1849 _mov(T, Src0);

1901 _subss(T, Src1);	1850 _subss(T, Src1);

1902 _mov(Dest, T);	1851 _mov(Dest, T);

1903 break;	1852 break;

1904 case InstArithmetic::Fmul:	1853 case InstArithmetic::Fmul:

1905 _mov(T, Src0);	1854 _mov(T, Src0);

1906 _mulss(T, Src0 == Src1 ? T : Src1);	1855 _mulss(T, Src0 == Src1 ? T : Src1);

1907 _mov(Dest, T);	1856 _mov(Dest, T);

1908 break;	1857 break;

1909 case InstArithmetic::Fdiv:	1858 case InstArithmetic::Fdiv:

1910 _mov(T, Src0);	1859 _mov(T, Src0);

1911 _divss(T, Src1);	1860 _divss(T, Src1);

1912 _mov(Dest, T);	1861 _mov(Dest, T);

1913 break;	1862 break;

1914 case InstArithmetic::Frem: {	1863 case InstArithmetic::Frem: {

1915 constexpr SizeT MaxSrcs = 2;	1864 constexpr SizeT MaxSrcs = 2;

1916 Type Ty = Dest->getType();

1917 InstCall *Call = makeHelperCall(	1865 InstCall *Call = makeHelperCall(

1918 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);	1866 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1919 Call->addArg(Src0);	1867 Call->addArg(Src0);

1920 Call->addArg(Src1);	1868 Call->addArg(Src1);

1921 return lowerCall(Call);	1869 return lowerCall(Call);

1922 }	1870 }

1923 }	1871 }

1924 }	1872 }

1925	1873

1926 template <class Machine>	1874 template <class Machine>

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1984 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1932 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1985 _cmp(Src0, Zero);	1933 _cmp(Src0, Zero);

1986 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());	1934 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

1987 }	1935 }

1988	1936

1989 template <class Machine>	1937 template <class Machine>

1990 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {	1938 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

1991 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)	1939 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

1992 InstCast::OpKind CastKind = Inst->getCastKind();	1940 InstCast::OpKind CastKind = Inst->getCastKind();

1993 Variable *Dest = Inst->getDest();	1941 Variable *Dest = Inst->getDest();

	1942 Type DestTy = Dest->getType();

1994 switch (CastKind) {	1943 switch (CastKind) {

1995 default:	1944 default:

1996 Func->setError("Cast type not supported");	1945 Func->setError("Cast type not supported");

1997 return;	1946 return;

1998 case InstCast::Sext: {	1947 case InstCast::Sext: {

1999 // Src0RM is the source operand legalized to physical register or memory,	1948 // Src0RM is the source operand legalized to physical register or memory,

2000 // but not immediate, since the relevant x86 native instructions don't	1949 // but not immediate, since the relevant x86 native instructions don't

2001 // allow an immediate operand. If the operand is an immediate, we could	1950 // allow an immediate operand. If the operand is an immediate, we could

2002 // consider computing the strength-reduced result at translation time, but	1951 // consider computing the strength-reduced result at translation time, but

2003 // we're unlikely to see something like that in the bitcode that the	1952 // we're unlikely to see something like that in the bitcode that the

2004 // optimizer wouldn't have already taken care of.	1953 // optimizer wouldn't have already taken care of.

2005 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	1954 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2006 if (isVectorType(Dest->getType())) {	1955 if (isVectorType(DestTy)) {

2007 Type DestTy = Dest->getType();

2008 if (DestTy == IceType_v16i8) {	1956 if (DestTy == IceType_v16i8) {

2009 // onemask = materialize(1,1,...); dst = (src & onemask) > 0	1957 // onemask = materialize(1,1,...); dst = (src & onemask) > 0

2010 Variable *OneMask = makeVectorOfOnes(Dest->getType());	1958 Variable *OneMask = makeVectorOfOnes(DestTy);

2011 Variable *T = makeReg(DestTy);	1959 Variable *T = makeReg(DestTy);

2012 _movp(T, Src0RM);	1960 _movp(T, Src0RM);

2013 _pand(T, OneMask);	1961 _pand(T, OneMask);

2014 Variable *Zeros = makeVectorOfZeros(Dest->getType());	1962 Variable *Zeros = makeVectorOfZeros(DestTy);

2015 _pcmpgt(T, Zeros);	1963 _pcmpgt(T, Zeros);

2016 _movp(Dest, T);	1964 _movp(Dest, T);

2017 } else {	1965 } else {

2018 /// width = width(elty) - 1; dest = (src << width) >> width	1966 /// width = width(elty) - 1; dest = (src << width) >> width

2019 SizeT ShiftAmount =	1967 SizeT ShiftAmount =

2020 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -	1968 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -

2021 1;	1969 1;

2022 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);	1970 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

2023 Variable *T = makeReg(DestTy);	1971 Variable *T = makeReg(DestTy);

2024 _movp(T, Src0RM);	1972 _movp(T, Src0RM);

2025 _psll(T, ShiftConstant);	1973 _psll(T, ShiftConstant);

2026 _psra(T, ShiftConstant);	1974 _psra(T, ShiftConstant);

2027 _movp(Dest, T);	1975 _movp(Dest, T);

2028 }	1976 }

2029 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	1977 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {

2030 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2	1978 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

2031 Constant *Shift = Ctx->getConstantInt32(31);	1979 Constant *Shift = Ctx->getConstantInt32(31);

2032 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1980 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2033 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1981 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2034 Variable *T_Lo = makeReg(DestLo->getType());	1982 Variable *T_Lo = makeReg(DestLo->getType());

2035 if (Src0RM->getType() == IceType_i32) {	1983 if (Src0RM->getType() == IceType_i32) {

2036 _mov(T_Lo, Src0RM);	1984 _mov(T_Lo, Src0RM);

2037 } else if (Src0RM->getType() == IceType_i1) {	1985 } else if (Src0RM->getType() == IceType_i1) {

2038 _movzx(T_Lo, Src0RM);	1986 _movzx(T_Lo, Src0RM);

2039 _shl(T_Lo, Shift);	1987 _shl(T_Lo, Shift);

2040 _sar(T_Lo, Shift);	1988 _sar(T_Lo, Shift);

2041 } else {	1989 } else {

2042 _movsx(T_Lo, Src0RM);	1990 _movsx(T_Lo, Src0RM);

2043 }	1991 }

2044 _mov(DestLo, T_Lo);	1992 _mov(DestLo, T_Lo);

2045 Variable *T_Hi = nullptr;	1993 Variable *T_Hi = nullptr;

2046 _mov(T_Hi, T_Lo);	1994 _mov(T_Hi, T_Lo);

2047 if (Src0RM->getType() != IceType_i1)	1995 if (Src0RM->getType() != IceType_i1)

2048 // For i1, the sar instruction is already done above.	1996 // For i1, the sar instruction is already done above.

2049 _sar(T_Hi, Shift);	1997 _sar(T_Hi, Shift);

2050 _mov(DestHi, T_Hi);	1998 _mov(DestHi, T_Hi);

2051 } else if (Src0RM->getType() == IceType_i1) {	1999 } else if (Src0RM->getType() == IceType_i1) {

2052 // t1 = src	2000 // t1 = src

2053 // shl t1, dst_bitwidth - 1	2001 // shl t1, dst_bitwidth - 1

2054 // sar t1, dst_bitwidth - 1	2002 // sar t1, dst_bitwidth - 1

2055 // dst = t1	2003 // dst = t1

2056 size_t DestBits =	2004 size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);

2057 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

2058 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);	2005 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);

2059 Variable *T = makeReg(Dest->getType());	2006 Variable *T = makeReg(DestTy);

2060 if (typeWidthInBytes(Dest->getType()) <=	2007 if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {

2061 typeWidthInBytes(Src0RM->getType())) {

2062 _mov(T, Src0RM);	2008 _mov(T, Src0RM);

2063 } else {	2009 } else {

2064 // Widen the source using movsx or movzx. (It doesn't matter which one,	2010 // Widen the source using movsx or movzx. (It doesn't matter which one,

2065 // since the following shl/sar overwrite the bits.)	2011 // since the following shl/sar overwrite the bits.)

2066 _movzx(T, Src0RM);	2012 _movzx(T, Src0RM);

2067 }	2013 }

2068 _shl(T, ShiftAmount);	2014 _shl(T, ShiftAmount);

2069 _sar(T, ShiftAmount);	2015 _sar(T, ShiftAmount);

2070 _mov(Dest, T);	2016 _mov(Dest, T);

2071 } else {	2017 } else {

2072 // t1 = movsx src; dst = t1	2018 // t1 = movsx src; dst = t1

2073 Variable *T = makeReg(Dest->getType());	2019 Variable *T = makeReg(DestTy);

2074 _movsx(T, Src0RM);	2020 _movsx(T, Src0RM);

2075 _mov(Dest, T);	2021 _mov(Dest, T);

2076 }	2022 }

2077 break;	2023 break;

2078 }	2024 }

2079 case InstCast::Zext: {	2025 case InstCast::Zext: {

2080 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2026 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2081 if (isVectorType(Dest->getType())) {	2027 if (isVectorType(DestTy)) {

2082 // onemask = materialize(1,1,...); dest = onemask & src	2028 // onemask = materialize(1,1,...); dest = onemask & src

2083 Type DestTy = Dest->getType();

2084 Variable *OneMask = makeVectorOfOnes(DestTy);	2029 Variable *OneMask = makeVectorOfOnes(DestTy);

2085 Variable *T = makeReg(DestTy);	2030 Variable *T = makeReg(DestTy);

2086 _movp(T, Src0RM);	2031 _movp(T, Src0RM);

2087 _pand(T, OneMask);	2032 _pand(T, OneMask);

2088 _movp(Dest, T);	2033 _movp(Dest, T);

2089 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	2034 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {

2090 // t1=movzx src; dst.lo=t1; dst.hi=0	2035 // t1=movzx src; dst.lo=t1; dst.hi=0

2091 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2036 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2092 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2037 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2093 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2038 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2094 Variable *Tmp = makeReg(DestLo->getType());	2039 Variable *Tmp = makeReg(DestLo->getType());

2095 if (Src0RM->getType() == IceType_i32) {	2040 if (Src0RM->getType() == IceType_i32) {

2096 _mov(Tmp, Src0RM);	2041 _mov(Tmp, Src0RM);

2097 } else {	2042 } else {

2098 _movzx(Tmp, Src0RM);	2043 _movzx(Tmp, Src0RM);

2099 }	2044 }

2100 _mov(DestLo, Tmp);	2045 _mov(DestLo, Tmp);

2101 _mov(DestHi, Zero);	2046 _mov(DestHi, Zero);

2102 } else if (Src0RM->getType() == IceType_i1) {	2047 } else if (Src0RM->getType() == IceType_i1) {

2103 // t = Src0RM; Dest = t	2048 // t = Src0RM; Dest = t

2104 Type DestTy = Dest->getType();

2105 Variable *T = nullptr;	2049 Variable *T = nullptr;

2106 if (DestTy == IceType_i8) {	2050 if (DestTy == IceType_i8) {

2107 _mov(T, Src0RM);	2051 _mov(T, Src0RM);

2108 } else {	2052 } else {

2109 assert(DestTy != IceType_i1);	2053 assert(DestTy != IceType_i1);

2110 assert(Traits::Is64Bit \|\| DestTy != IceType_i64);	2054 assert(Traits::Is64Bit \|\| DestTy != IceType_i64);

2111 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.	2055 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.

2112 // In x86-64 we need to widen T to 64-bits to ensure that T -- if	2056 // In x86-64 we need to widen T to 64-bits to ensure that T -- if

2113 // written to the stack (i.e., in -Om1) will be fully zero-extended.	2057 // written to the stack (i.e., in -Om1) will be fully zero-extended.

2114 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);	2058 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);

2115 _movzx(T, Src0RM);	2059 _movzx(T, Src0RM);

2116 }	2060 }

2117 _mov(Dest, T);	2061 _mov(Dest, T);

2118 } else {	2062 } else {

2119 // t1 = movzx src; dst = t1	2063 // t1 = movzx src; dst = t1

2120 Variable *T = makeReg(Dest->getType());	2064 Variable *T = makeReg(DestTy);

2121 _movzx(T, Src0RM);	2065 _movzx(T, Src0RM);

2122 _mov(Dest, T);	2066 _mov(Dest, T);

2123 }	2067 }

2124 break;	2068 break;

2125 }	2069 }

2126 case InstCast::Trunc: {	2070 case InstCast::Trunc: {

2127 if (isVectorType(Dest->getType())) {	2071 if (isVectorType(DestTy)) {

2128 // onemask = materialize(1,1,...); dst = src & onemask	2072 // onemask = materialize(1,1,...); dst = src & onemask

2129 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2073 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2130 Type Src0Ty = Src0RM->getType();	2074 Type Src0Ty = Src0RM->getType();

2131 Variable *OneMask = makeVectorOfOnes(Src0Ty);	2075 Variable *OneMask = makeVectorOfOnes(Src0Ty);

2132 Variable *T = makeReg(Dest->getType());	2076 Variable *T = makeReg(DestTy);

2133 _movp(T, Src0RM);	2077 _movp(T, Src0RM);

2134 _pand(T, OneMask);	2078 _pand(T, OneMask);

2135 _movp(Dest, T);	2079 _movp(Dest, T);

	2080 } else if (DestTy == IceType_i1 \|\| DestTy == IceType_i8) {

	2081 // Make sure we truncate from and into valid registers.

	2082 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

	2083 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)

	2084 Src0 = loOperand(Src0);

	2085 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

	2086 Variable *T = copyToReg8(Src0RM);

	2087 if (DestTy == IceType_i1)

	2088 _and(T, Ctx->getConstantInt1(1));

	2089 _mov(Dest, T);

2136 } else {	2090 } else {

2137 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	2091 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

2138 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)	2092 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)

2139 Src0 = loOperand(Src0);	2093 Src0 = loOperand(Src0);

2140 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2094 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2141 // t1 = trunc Src0RM; Dest = t1	2095 // t1 = trunc Src0RM; Dest = t1

2142 Variable *T = nullptr;	2096 Variable *T = makeReg(DestTy);

2143 _mov(T, Src0RM);	2097 _mov(T, Src0RM);

2144 if (Dest->getType() == IceType_i1)

2145 _and(T, Ctx->getConstantInt1(1));

2146 _mov(Dest, T);	2098 _mov(Dest, T);

2147 }	2099 }

2148 break;	2100 break;

2149 }	2101 }

2150 case InstCast::Fptrunc:	2102 case InstCast::Fptrunc:

2151 case InstCast::Fpext: {	2103 case InstCast::Fpext: {

2152 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2104 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2153 // t1 = cvt Src0RM; Dest = t1	2105 // t1 = cvt Src0RM; Dest = t1

2154 Variable *T = makeReg(Dest->getType());	2106 Variable *T = makeReg(DestTy);

2155 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);	2107 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);

2156 _mov(Dest, T);	2108 _mov(Dest, T);

2157 break;	2109 break;

2158 }	2110 }

2159 case InstCast::Fptosi:	2111 case InstCast::Fptosi:

2160 if (isVectorType(Dest->getType())) {	2112 if (isVectorType(DestTy)) {

2161 assert(Dest->getType() == IceType_v4i32 &&	2113 assert(DestTy == IceType_v4i32 &&

2162 Inst->getSrc(0)->getType() == IceType_v4f32);	2114 Inst->getSrc(0)->getType() == IceType_v4f32);

2163 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2115 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2164 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2116 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2165 Src0RM = legalizeToReg(Src0RM);	2117 Src0RM = legalizeToReg(Src0RM);

2166 Variable *T = makeReg(Dest->getType());	2118 Variable *T = makeReg(DestTy);

2167 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);	2119 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

2168 _movp(Dest, T);	2120 _movp(Dest, T);

2169 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	2121 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {

2170 constexpr SizeT MaxSrcs = 1;	2122 constexpr SizeT MaxSrcs = 1;

2171 Type SrcType = Inst->getSrc(0)->getType();	2123 Type SrcType = Inst->getSrc(0)->getType();

2172 InstCall *Call =	2124 InstCall *Call =

2173 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64	2125 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2174 : H_fptosi_f64_i64,	2126 : H_fptosi_f64_i64,

2175 Dest, MaxSrcs);	2127 Dest, MaxSrcs);

2176 Call->addArg(Inst->getSrc(0));	2128 Call->addArg(Inst->getSrc(0));

2177 lowerCall(Call);	2129 lowerCall(Call);

2178 } else {	2130 } else {

2179 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2131 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2180 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2132 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2181 Variable *T_1 = nullptr;	2133 Variable *T_1 = nullptr;

2182 if (Traits::Is64Bit && Dest->getType() == IceType_i64) {	2134 if (Traits::Is64Bit && DestTy == IceType_i64) {

2183 T_1 = makeReg(IceType_i64);	2135 T_1 = makeReg(IceType_i64);

2184 } else {	2136 } else {

2185 assert(Dest->getType() != IceType_i64);	2137 assert(DestTy != IceType_i64);

2186 T_1 = makeReg(IceType_i32);	2138 T_1 = makeReg(IceType_i32);

2187 }	2139 }

2188 // cvt() requires its integer argument to be a GPR.	2140 // cvt() requires its integer argument to be a GPR.

2189 Variable *T_2 = makeReg(Dest->getType());	2141 Variable *T_2 = makeReg(DestTy);

	2142 if (isByteSizedType(DestTy)) {

	2143 assert(T_1->getType() == IceType_i32);

	2144 T_1->setRegClass(RCX86_Is32To8);

	2145 T_2->setRegClass(RCX86_IsTrunc8Rcvr);

	2146 }

2190 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);	2147 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);

2191 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2148 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2192 if (Dest->getType() == IceType_i1)	2149 if (DestTy == IceType_i1)

2193 _and(T_2, Ctx->getConstantInt1(1));	2150 _and(T_2, Ctx->getConstantInt1(1));

2194 _mov(Dest, T_2);	2151 _mov(Dest, T_2);

2195 }	2152 }

2196 break;	2153 break;

2197 case InstCast::Fptoui:	2154 case InstCast::Fptoui:

2198 if (isVectorType(Dest->getType())) {	2155 if (isVectorType(DestTy)) {

2199 assert(Dest->getType() == IceType_v4i32 &&	2156 assert(DestTy == IceType_v4i32 &&

2200 Inst->getSrc(0)->getType() == IceType_v4f32);	2157 Inst->getSrc(0)->getType() == IceType_v4f32);

2201 constexpr SizeT MaxSrcs = 1;	2158 constexpr SizeT MaxSrcs = 1;

2202 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);	2159 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);

2203 Call->addArg(Inst->getSrc(0));	2160 Call->addArg(Inst->getSrc(0));

2204 lowerCall(Call);	2161 lowerCall(Call);

2205 } else if (Dest->getType() == IceType_i64 \|\|	2162 } else if (DestTy == IceType_i64 \|\|

2206 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {	2163 (!Traits::Is64Bit && DestTy == IceType_i32)) {

2207 // Use a helper for both x86-32 and x86-64.	2164 // Use a helper for both x86-32 and x86-64.

2208 constexpr SizeT MaxSrcs = 1;	2165 constexpr SizeT MaxSrcs = 1;

2209 Type DestType = Dest->getType();

2210 Type SrcType = Inst->getSrc(0)->getType();	2166 Type SrcType = Inst->getSrc(0)->getType();

2211 IceString TargetString;	2167 IceString TargetString;

2212 if (Traits::Is64Bit) {	2168 if (Traits::Is64Bit) {

2213 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64	2169 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2214 : H_fptoui_f64_i64;	2170 : H_fptoui_f64_i64;

2215 } else if (isInt32Asserting32Or64(DestType)) {	2171 } else if (isInt32Asserting32Or64(DestTy)) {

2216 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32	2172 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

2217 : H_fptoui_f64_i32;	2173 : H_fptoui_f64_i32;

2218 } else {	2174 } else {

2219 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64	2175 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2220 : H_fptoui_f64_i64;	2176 : H_fptoui_f64_i64;

2221 }	2177 }

2222 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);	2178 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2223 Call->addArg(Inst->getSrc(0));	2179 Call->addArg(Inst->getSrc(0));

2224 lowerCall(Call);	2180 lowerCall(Call);

2225 return;	2181 return;

2226 } else {	2182 } else {

2227 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2183 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2228 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2184 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2229 assert(Dest->getType() != IceType_i64);	2185 assert(DestTy != IceType_i64);

2230 Variable *T_1 = nullptr;	2186 Variable *T_1 = nullptr;

2231 if (Traits::Is64Bit && Dest->getType() == IceType_i32) {	2187 if (Traits::Is64Bit && DestTy == IceType_i32) {

2232 T_1 = makeReg(IceType_i64);	2188 T_1 = makeReg(IceType_i64);

2233 } else {	2189 } else {

2234 assert(Dest->getType() != IceType_i32);	2190 assert(DestTy != IceType_i32);

2235 T_1 = makeReg(IceType_i32);	2191 T_1 = makeReg(IceType_i32);

2236 }	2192 }

2237 Variable *T_2 = makeReg(Dest->getType());	2193 Variable *T_2 = makeReg(DestTy);

	2194 if (isByteSizedType(DestTy)) {

	2195 assert(T_1->getType() == IceType_i32);

	2196 T_1->setRegClass(RCX86_Is32To8);

	2197 T_2->setRegClass(RCX86_IsTrunc8Rcvr);

	2198 }

2238 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);	2199 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);

2239 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2200 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2240 if (Dest->getType() == IceType_i1)	2201 if (DestTy == IceType_i1)

2241 _and(T_2, Ctx->getConstantInt1(1));	2202 _and(T_2, Ctx->getConstantInt1(1));

2242 _mov(Dest, T_2);	2203 _mov(Dest, T_2);

2243 }	2204 }

2244 break;	2205 break;

2245 case InstCast::Sitofp:	2206 case InstCast::Sitofp:

2246 if (isVectorType(Dest->getType())) {	2207 if (isVectorType(DestTy)) {

2247 assert(Dest->getType() == IceType_v4f32 &&	2208 assert(DestTy == IceType_v4f32 &&

2248 Inst->getSrc(0)->getType() == IceType_v4i32);	2209 Inst->getSrc(0)->getType() == IceType_v4i32);

2249 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2210 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2250 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2211 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2251 Src0RM = legalizeToReg(Src0RM);	2212 Src0RM = legalizeToReg(Src0RM);

2252 Variable *T = makeReg(Dest->getType());	2213 Variable *T = makeReg(DestTy);

2253 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);	2214 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);

2254 _movp(Dest, T);	2215 _movp(Dest, T);

2255 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {	2216 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {

2256 // Use a helper for x86-32.	2217 // Use a helper for x86-32.

2257 constexpr SizeT MaxSrcs = 1;	2218 constexpr SizeT MaxSrcs = 1;

2258 Type DestType = Dest->getType();

2259 InstCall *Call =	2219 InstCall *Call =

2260 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32	2220 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32

2261 : H_sitofp_i64_f64,	2221 : H_sitofp_i64_f64,

2262 Dest, MaxSrcs);	2222 Dest, MaxSrcs);

2263 // TODO: Call the correct compiler-rt helper function.	2223 // TODO: Call the correct compiler-rt helper function.

2264 Call->addArg(Inst->getSrc(0));	2224 Call->addArg(Inst->getSrc(0));

2265 lowerCall(Call);	2225 lowerCall(Call);

2266 return;	2226 return;

2267 } else {	2227 } else {

2268 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2228 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2269 // Sign-extend the operand.	2229 // Sign-extend the operand.

2270 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2	2230 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

2271 Variable *T_1 = nullptr;	2231 Variable *T_1 = nullptr;

2272 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {	2232 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {

2273 T_1 = makeReg(IceType_i64);	2233 T_1 = makeReg(IceType_i64);

2274 } else {	2234 } else {

2275 assert(Src0RM->getType() != IceType_i64);	2235 assert(Src0RM->getType() != IceType_i64);

2276 T_1 = makeReg(IceType_i32);	2236 T_1 = makeReg(IceType_i32);

2277 }	2237 }

2278 Variable *T_2 = makeReg(Dest->getType());	2238 Variable *T_2 = makeReg(DestTy);

2279 if (Src0RM->getType() == T_1->getType())	2239 if (Src0RM->getType() == T_1->getType())

2280 _mov(T_1, Src0RM);	2240 _mov(T_1, Src0RM);

2281 else	2241 else

2282 _movsx(T_1, Src0RM);	2242 _movsx(T_1, Src0RM);

2283 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2243 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2284 _mov(Dest, T_2);	2244 _mov(Dest, T_2);

2285 }	2245 }

2286 break;	2246 break;

2287 case InstCast::Uitofp: {	2247 case InstCast::Uitofp: {

2288 Operand *Src0 = Inst->getSrc(0);	2248 Operand *Src0 = Inst->getSrc(0);

2289 if (isVectorType(Src0->getType())) {	2249 if (isVectorType(Src0->getType())) {

2290 assert(Dest->getType() == IceType_v4f32 &&	2250 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32);

2291 Src0->getType() == IceType_v4i32);

2292 constexpr SizeT MaxSrcs = 1;	2251 constexpr SizeT MaxSrcs = 1;

2293 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);	2252 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2294 Call->addArg(Src0);	2253 Call->addArg(Src0);

2295 lowerCall(Call);	2254 lowerCall(Call);

2296 } else if (Src0->getType() == IceType_i64 \|\|	2255 } else if (Src0->getType() == IceType_i64 \|\|

2297 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {	2256 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

2298 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on	2257 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on

2299 // x86-32.	2258 // x86-32.

2300 constexpr SizeT MaxSrcs = 1;	2259 constexpr SizeT MaxSrcs = 1;

2301 Type DestType = Dest->getType();

2302 IceString TargetString;	2260 IceString TargetString;

2303 if (isInt32Asserting32Or64(Src0->getType())) {	2261 if (isInt32Asserting32Or64(Src0->getType())) {

2304 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32	2262 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32

2305 : H_uitofp_i32_f64;	2263 : H_uitofp_i32_f64;

2306 } else {	2264 } else {

2307 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32	2265 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32

2308 : H_uitofp_i64_f64;	2266 : H_uitofp_i64_f64;

2309 }	2267 }

2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);	2268 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2311 Call->addArg(Src0);	2269 Call->addArg(Src0);

2312 lowerCall(Call);	2270 lowerCall(Call);

2313 return;	2271 return;

2314 } else {	2272 } else {

2315 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2273 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2316 // Zero-extend the operand.	2274 // Zero-extend the operand.

2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2	2275 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

2318 Variable *T_1 = nullptr;	2276 Variable *T_1 = nullptr;

2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {	2277 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {

2320 T_1 = makeReg(IceType_i64);	2278 T_1 = makeReg(IceType_i64);

2321 } else {	2279 } else {

2322 assert(Src0RM->getType() != IceType_i64);	2280 assert(Src0RM->getType() != IceType_i64);

2323 assert(Traits::Is64Bit \|\| Src0RM->getType() != IceType_i32);	2281 assert(Traits::Is64Bit \|\| Src0RM->getType() != IceType_i32);

2324 T_1 = makeReg(IceType_i32);	2282 T_1 = makeReg(IceType_i32);

2325 }	2283 }

2326 Variable *T_2 = makeReg(Dest->getType());	2284 Variable *T_2 = makeReg(DestTy);

2327 if (Src0RM->getType() == T_1->getType())	2285 if (Src0RM->getType() == T_1->getType())

2328 _mov(T_1, Src0RM);	2286 _mov(T_1, Src0RM);

2329 else	2287 else

2330 _movzx(T_1, Src0RM);	2288 _movzx(T_1, Src0RM);

2331 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);	2289 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);

2332 _mov(Dest, T_2);	2290 _mov(Dest, T_2);

2333 }	2291 }

2334 break;	2292 break;

2335 }	2293 }

2336 case InstCast::Bitcast: {	2294 case InstCast::Bitcast: {

2337 Operand *Src0 = Inst->getSrc(0);	2295 Operand *Src0 = Inst->getSrc(0);

2338 if (Dest->getType() == Src0->getType()) {	2296 if (DestTy == Src0->getType()) {

2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);	2297 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);

2340 lowerAssign(Assign);	2298 lowerAssign(Assign);

2341 return;	2299 return;

2342 }	2300 }

2343 switch (Dest->getType()) {	2301 switch (DestTy) {

2344 default:	2302 default:

2345 llvm_unreachable("Unexpected Bitcast dest type");	2303 llvm_unreachable("Unexpected Bitcast dest type");

2346 case IceType_i8: {	2304 case IceType_i8: {

2347 assert(Src0->getType() == IceType_v8i1);	2305 assert(Src0->getType() == IceType_v8i1);

2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);	2306 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);

2349 Call->addArg(Src0);	2307 Call->addArg(Src0);

2350 lowerCall(Call);	2308 lowerCall(Call);

2351 } break;	2309 } break;

2352 case IceType_i16: {	2310 case IceType_i16: {

2353 assert(Src0->getType() == IceType_v16i1);	2311 assert(Src0->getType() == IceType_v16i1);

2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);	2312 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);

2355 Call->addArg(Src0);	2313 Call->addArg(Src0);

2356 lowerCall(Call);	2314 lowerCall(Call);

2357 } break;	2315 } break;

2358 case IceType_i32:	2316 case IceType_i32:

2359 case IceType_f32: {	2317 case IceType_f32: {

2360 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2318 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2361 Type DestType = Dest->getType();

2362 Type SrcType = Src0RM->getType();	2319 Type SrcType = Src0RM->getType();

2363 (void)DestType;	2320 assert((DestTy == IceType_i32 && SrcType == IceType_f32) \|\|

2364 assert((DestType == IceType_i32 && SrcType == IceType_f32) \|\|	2321 (DestTy == IceType_f32 && SrcType == IceType_i32));

2365 (DestType == IceType_f32 && SrcType == IceType_i32));

2366 // a.i32 = bitcast b.f32 ==>	2322 // a.i32 = bitcast b.f32 ==>

2367 // t.f32 = b.f32	2323 // t.f32 = b.f32

2368 // s.f32 = spill t.f32	2324 // s.f32 = spill t.f32

2369 // a.i32 = s.f32	2325 // a.i32 = s.f32

2370 Variable *T = nullptr;	2326 Variable *T = nullptr;

2371 // TODO: Should be able to force a spill setup by calling legalize() with	2327 // TODO: Should be able to force a spill setup by calling legalize() with

2372 // Legal_Mem and not Legal_Reg or Legal_Imm.	2328 // Legal_Mem and not Legal_Reg or Legal_Imm.

2373 typename Traits::SpillVariable *SpillVar =	2329 typename Traits::SpillVariable *SpillVar =

2374 Func->makeVariable<typename Traits::SpillVariable>(SrcType);	2330 Func->makeVariable<typename Traits::SpillVariable>(SrcType);

2375 SpillVar->setLinkedTo(Dest);	2331 SpillVar->setLinkedTo(Dest);

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2429 if (Traits::Is64Bit) {	2385 if (Traits::Is64Bit) {

2430 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2386 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2431 Variable *T = makeReg(IceType_f64);	2387 Variable *T = makeReg(IceType_f64);

2432 // Movd requires its fp argument (in this case, the bitcast	2388 // Movd requires its fp argument (in this case, the bitcast

2433 // destination) to be an xmm register.	2389 // destination) to be an xmm register.

2434 _movd(T, Src0RM);	2390 _movd(T, Src0RM);

2435 _mov(Dest, T);	2391 _mov(Dest, T);

2436 } else {	2392 } else {

2437 Src0 = legalize(Src0);	2393 Src0 = legalize(Src0);

2438 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {	2394 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {

2439 Variable *T = Func->makeVariable(Dest->getType());	2395 Variable *T = Func->makeVariable(DestTy);

2440 _movq(T, Src0);	2396 _movq(T, Src0);

2441 _movq(Dest, T);	2397 _movq(Dest, T);

2442 break;	2398 break;

2443 }	2399 }

2444 // a.f64 = bitcast b.i64 ==>	2400 // a.f64 = bitcast b.i64 ==>

2445 // t_lo.i32 = b_lo.i32	2401 // t_lo.i32 = b_lo.i32

2446 // FakeDef(s.f64)	2402 // FakeDef(s.f64)

2447 // lo(s.f64) = t_lo.i32	2403 // lo(s.f64) = t_lo.i32

2448 // t_hi.i32 = b_hi.i32	2404 // t_hi.i32 = b_hi.i32

2449 // hi(s.f64) = t_hi.i32	2405 // hi(s.f64) = t_hi.i32

(...skipping 580 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3030	2986

3031 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|	2987 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|

3032 InstructionSet >= Traits::SSE4_1) {	2988 InstructionSet >= Traits::SSE4_1) {

3033 // Use insertps, pinsrb, pinsrw, or pinsrd.	2989 // Use insertps, pinsrb, pinsrw, or pinsrd.

3034 Operand *ElementRM =	2990 Operand *ElementRM =

3035 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);	2991 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

3036 Operand *SourceVectRM =	2992 Operand *SourceVectRM =

3037 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	2993 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

3038 Variable *T = makeReg(Ty);	2994 Variable *T = makeReg(Ty);

3039 _movp(T, SourceVectRM);	2995 _movp(T, SourceVectRM);

3040 if (Ty == IceType_v4f32)	2996 if (Ty == IceType_v4f32) {

3041 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));	2997 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));

3042 else	2998 } else {

3043 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source	2999 // For the pinsrb and pinsrw instructions, when the source operand is a

3044 // operand is a register, it must be a full r32 register like eax, and not	3000 // register, it must be a full r32 register like eax, and not ax/al/ah.

3045 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates	3001 // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use

3046 // for the use of r16 and r8 by converting them through getBaseReg(),	3002 // of r16 and r8 by converting them through getBaseReg(), while emitIAS()

3047 // while emitIAS() validates that the original and base register encodings	3003 // validates that the original and base register encodings are the same.

3048 // are the same. But for an "interior" register like ah, it should	3004 if (ElementRM->getType() == IceType_i8 &&

3049 // probably be copied into an r32 via movzx so that the types work out.	3005 llvm::isa<Variable>(ElementRM)) {

	3006 // Don't use ah/bh/ch/dh for pinsrb.

	3007 ElementRM = copyToReg8(ElementRM);

	3008 }

3050 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));	3009 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));

	3010 }

3051 _movp(Inst->getDest(), T);	3011 _movp(Inst->getDest(), T);

3052 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	3012 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

3053 // Use shufps or movss.	3013 // Use shufps or movss.

3054 Variable *ElementR = nullptr;	3014 Variable *ElementR = nullptr;

3055 Operand *SourceVectRM =	3015 Operand *SourceVectRM =

3056 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	3016 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

3057	3017

3058 if (InVectorElementTy == IceType_f32) {	3018 if (InVectorElementTy == IceType_f32) {

3059 // ElementR will be in an XMM register since it is floating point.	3019 // ElementR will be in an XMM register since it is floating point.

3060 ElementR = legalizeToReg(ElementToInsertNotLegalized);	3020 ElementR = legalizeToReg(ElementToInsertNotLegalized);

(...skipping 2286 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5347 // TODO(wala,stichnot): lea should not	5307 // TODO(wala,stichnot): lea should not

5348 // be required. The address of the stack slot is known at compile time	5308 // be required. The address of the stack slot is known at compile time

5349 // (although not until after addProlog()).	5309 // (although not until after addProlog()).

5350 constexpr Type PointerType = IceType_i32;	5310 constexpr Type PointerType = IceType_i32;

5351 Variable *Loc = makeReg(PointerType);	5311 Variable *Loc = makeReg(PointerType);

5352 _lea(Loc, Slot);	5312 _lea(Loc, Slot);

5353 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);	5313 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

5354 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);	5314 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);

5355 }	5315 }

5356	5316

	5317 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.

	5318 /// Src is assumed to already be legalized. If the source operand is known to

	5319 /// be a memory or immediate operand, a simple mov will suffice. But if the

	5320 /// source operand can be a physical register, then it must first be copied into

	5321 /// a physical register that is truncable to 8-bit, then truncated into a

	5322 /// physical register that can receive a truncation, and finally copied into the

	5323 /// result 8-bit register (which in general can be any 8-bit register). For

	5324 /// example, moving %ebp into %ah may be accomplished as:

	5325 /// movl %ebp, %edx

	5326 /// mov_trunc %edx, %dl // this redundant assignment is ultimately elided

	5327 /// movb %dl, %ah

	5328 /// On the other hand, moving a memory or immediate operand into ah:

	5329 /// movb 4(%ebp), %ah

	5330 /// movb $my_imm, %ah

	5331 ///

	5332 /// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not

	5333 /// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,

	5334 /// use RegNum=NoRegister and then let the caller do a separate copy into

	5335 /// Reg_ah.

	5336 ///

	5337 /// Note #2. ConstantRelocatable operands are also put through this process

	5338 /// (not truncated directly) because our ELF emitter does R_386_32 relocations

	5339 /// but not R_386_8 relocations.

	5340 ///

	5341 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8

	5342 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper

	5343 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument

	5344 /// to the pinsrb instruction.

	5345 template <class Machine>

	5346 Variable TargetX86Base<Machine>::copyToReg8(Operand Src, int32_t RegNum) {

	5347 Type Ty = Src->getType();

	5348 assert(isScalarIntegerType(Ty));

	5349 assert(Ty != IceType_i1);

	5350 Variable *Reg = makeReg(IceType_i8, RegNum);

	5351 Reg->setRegClass(RCX86_IsTrunc8Rcvr);

	5352 if (llvm::isa<Variable>(Src) \|\| llvm::isa<ConstantRelocatable>(Src)) {

	5353 Variable *SrcTruncable = makeReg(Ty);

	5354 switch (Ty) {

	5355 case IceType_i64:

	5356 SrcTruncable->setRegClass(RCX86_Is64To8);

	5357 break;

	5358 case IceType_i32:

	5359 SrcTruncable->setRegClass(RCX86_Is32To8);

	5360 break;

	5361 case IceType_i16:

	5362 SrcTruncable->setRegClass(RCX86_Is16To8);

	5363 break;

	5364 default:

	5365 // i8 - just use default register class

	5366 break;

	5367 }

	5368 Variable *SrcRcvr = makeReg(IceType_i8);

	5369 SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);

	5370 _mov(SrcTruncable, Src);

	5371 _mov(SrcRcvr, SrcTruncable);

	5372 Src = SrcRcvr;

	5373 }

	5374 _mov(Reg, Src);

	5375 return Reg;

	5376 }

	5377

5357 /// Helper for legalize() to emit the right code to lower an operand to a	5378 /// Helper for legalize() to emit the right code to lower an operand to a

5358 /// register of the appropriate type.	5379 /// register of the appropriate type.

5359 template <class Machine>	5380 template <class Machine>

5360 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {	5381 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {

5361 Type Ty = Src->getType();	5382 Type Ty = Src->getType();

5362 Variable *Reg = makeReg(Ty, RegNum);	5383 Variable *Reg = makeReg(Ty, RegNum);

5363 if (isVectorType(Ty)) {	5384 if (isVectorType(Ty)) {

5364 _movp(Reg, Src);	5385 _movp(Reg, Src);

5365 } else {	5386 } else {

5366 _mov(Reg, Src);	5387 _mov(Reg, Src);

(...skipping 493 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5860 }	5881 }

5861 // the offset is not eligible for blinding or pooling, return the original	5882 // the offset is not eligible for blinding or pooling, return the original

5862 // mem operand	5883 // mem operand

5863 return MemOperand;	5884 return MemOperand;

5864 }	5885 }

5865	5886

5866 } // end of namespace X86Internal	5887 } // end of namespace X86Internal

5867 } // end of namespace Ice	5888 } // end of namespace Ice

5868	5889

5869 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5890 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTargetLoweringX86RegClass.h » ('j') | no next file with comments »