Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1427973003: Subzero: Refactor x86 register representation to actively use aliases. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Reformat Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTargetLoweringX86RegClass.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1198 matching lines...) Expand 10 before | Expand all | Expand 10 after
1209 } 1209 }
1210 } else { 1210 } else {
1211 // NON-CONSTANT CASES. 1211 // NON-CONSTANT CASES.
1212 Constant *BitTest = Ctx->getConstantInt32(0x20); 1212 Constant *BitTest = Ctx->getConstantInt32(0x20);
1213 typename Traits::Insts::Label *Label = 1213 typename Traits::Insts::Label *Label =
1214 Traits::Insts::Label::create(Func, this); 1214 Traits::Insts::Label::create(Func, this);
1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==> 1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==>
1216 // t1:ecx = c.lo & 0xff 1216 // t1:ecx = c.lo & 0xff
1217 // t2 = b.lo 1217 // t2 = b.lo
1218 // t3 = b.hi 1218 // t3 = b.hi
1219 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); 1219 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl);
1220 _mov(T_1, Src1Lo);
1221 _mov(T_2, Src0Lo); 1220 _mov(T_2, Src0Lo);
1222 _mov(T_3, Src0Hi); 1221 _mov(T_3, Src0Hi);
1223 switch (Op) { 1222 switch (Op) {
1224 default: 1223 default:
1225 assert(0 && "non-shift op"); 1224 assert(0 && "non-shift op");
1226 break; 1225 break;
1227 case InstArithmetic::Shl: { 1226 case InstArithmetic::Shl: {
1228 // a=b<<c ==> 1227 // a=b<<c ==>
1229 // t3 = shld t3, t2, t1 1228 // t3 = shld t3, t2, t1
1230 // t2 = shl t2, t1 1229 // t2 = shl t2, t1
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1288 // a.hi = t3 1287 // a.hi = t3
1289 Context.insert(Label); 1288 Context.insert(Label);
1290 _mov(DestLo, T_2); 1289 _mov(DestLo, T_2);
1291 _mov(DestHi, T_3); 1290 _mov(DestHi, T_3);
1292 } 1291 }
1293 } 1292 }
1294 1293
1295 template <class Machine> 1294 template <class Machine>
1296 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { 1295 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
1297 Variable *Dest = Inst->getDest(); 1296 Variable *Dest = Inst->getDest();
1297 Type Ty = Dest->getType();
1298 Operand *Src0 = legalize(Inst->getSrc(0)); 1298 Operand *Src0 = legalize(Inst->getSrc(0));
1299 Operand *Src1 = legalize(Inst->getSrc(1)); 1299 Operand *Src1 = legalize(Inst->getSrc(1));
1300 if (Inst->isCommutative()) { 1300 if (Inst->isCommutative()) {
1301 uint32_t SwapCount = 0; 1301 uint32_t SwapCount = 0;
1302 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { 1302 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) {
1303 std::swap(Src0, Src1); 1303 std::swap(Src0, Src1);
1304 ++SwapCount; 1304 ++SwapCount;
1305 } 1305 }
1306 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) { 1306 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) {
1307 std::swap(Src0, Src1); 1307 std::swap(Src0, Src1);
1308 ++SwapCount; 1308 ++SwapCount;
1309 } 1309 }
1310 // Improve two-address code patterns by avoiding a copy to the dest 1310 // Improve two-address code patterns by avoiding a copy to the dest
1311 // register when one of the source operands ends its lifetime here. 1311 // register when one of the source operands ends its lifetime here.
1312 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) { 1312 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) {
1313 std::swap(Src0, Src1); 1313 std::swap(Src0, Src1);
1314 ++SwapCount; 1314 ++SwapCount;
1315 } 1315 }
1316 assert(SwapCount <= 1); 1316 assert(SwapCount <= 1);
1317 (void)SwapCount; 1317 (void)SwapCount;
1318 } 1318 }
1319 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1319 if (!Traits::Is64Bit && Ty == IceType_i64) {
1320 // These x86-32 helper-call-involved instructions are lowered in this 1320 // These x86-32 helper-call-involved instructions are lowered in this
1321 // separate switch. This is because loOperand() and hiOperand() may insert 1321 // separate switch. This is because loOperand() and hiOperand() may insert
1322 // redundant instructions for constant blinding and pooling. Such redundant 1322 // redundant instructions for constant blinding and pooling. Such redundant
1323 // instructions will fail liveness analysis under -Om1 setting. And, 1323 // instructions will fail liveness analysis under -Om1 setting. And,
1324 // actually these arguments do not need to be processed with loOperand() 1324 // actually these arguments do not need to be processed with loOperand()
1325 // and hiOperand() to be used. 1325 // and hiOperand() to be used.
1326 switch (Inst->getOp()) { 1326 switch (Inst->getOp()) {
1327 case InstArithmetic::Udiv: { 1327 case InstArithmetic::Udiv: {
1328 constexpr SizeT MaxSrcs = 2; 1328 constexpr SizeT MaxSrcs = 2;
1329 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1329 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
1456 case InstArithmetic::Udiv: 1456 case InstArithmetic::Udiv:
1457 case InstArithmetic::Sdiv: 1457 case InstArithmetic::Sdiv:
1458 case InstArithmetic::Urem: 1458 case InstArithmetic::Urem:
1459 case InstArithmetic::Srem: 1459 case InstArithmetic::Srem:
1460 llvm_unreachable("Call-helper-involved instruction for i64 type \ 1460 llvm_unreachable("Call-helper-involved instruction for i64 type \
1461 should have already been handled before"); 1461 should have already been handled before");
1462 break; 1462 break;
1463 } 1463 }
1464 return; 1464 return;
1465 } 1465 }
1466 if (isVectorType(Dest->getType())) { 1466 if (isVectorType(Ty)) {
1467 // TODO: Trap on integer divide and integer modulo by zero. See: 1467 // TODO: Trap on integer divide and integer modulo by zero. See:
1468 // https://code.google.com/p/nativeclient/issues/detail?id=3899 1468 // https://code.google.com/p/nativeclient/issues/detail?id=3899
1469 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) 1469 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
1470 Src1 = legalizeToReg(Src1); 1470 Src1 = legalizeToReg(Src1);
1471 switch (Inst->getOp()) { 1471 switch (Inst->getOp()) {
1472 case InstArithmetic::_num: 1472 case InstArithmetic::_num:
1473 llvm_unreachable("Unknown arithmetic operator"); 1473 llvm_unreachable("Unknown arithmetic operator");
1474 break; 1474 break;
1475 case InstArithmetic::Add: { 1475 case InstArithmetic::Add: {
1476 Variable *T = makeReg(Dest->getType()); 1476 Variable *T = makeReg(Ty);
1477 _movp(T, Src0); 1477 _movp(T, Src0);
1478 _padd(T, Src1); 1478 _padd(T, Src1);
1479 _movp(Dest, T); 1479 _movp(Dest, T);
1480 } break; 1480 } break;
1481 case InstArithmetic::And: { 1481 case InstArithmetic::And: {
1482 Variable *T = makeReg(Dest->getType()); 1482 Variable *T = makeReg(Ty);
1483 _movp(T, Src0); 1483 _movp(T, Src0);
1484 _pand(T, Src1); 1484 _pand(T, Src1);
1485 _movp(Dest, T); 1485 _movp(Dest, T);
1486 } break; 1486 } break;
1487 case InstArithmetic::Or: { 1487 case InstArithmetic::Or: {
1488 Variable *T = makeReg(Dest->getType()); 1488 Variable *T = makeReg(Ty);
1489 _movp(T, Src0); 1489 _movp(T, Src0);
1490 _por(T, Src1); 1490 _por(T, Src1);
1491 _movp(Dest, T); 1491 _movp(Dest, T);
1492 } break; 1492 } break;
1493 case InstArithmetic::Xor: { 1493 case InstArithmetic::Xor: {
1494 Variable *T = makeReg(Dest->getType()); 1494 Variable *T = makeReg(Ty);
1495 _movp(T, Src0); 1495 _movp(T, Src0);
1496 _pxor(T, Src1); 1496 _pxor(T, Src1);
1497 _movp(Dest, T); 1497 _movp(Dest, T);
1498 } break; 1498 } break;
1499 case InstArithmetic::Sub: { 1499 case InstArithmetic::Sub: {
1500 Variable *T = makeReg(Dest->getType()); 1500 Variable *T = makeReg(Ty);
1501 _movp(T, Src0); 1501 _movp(T, Src0);
1502 _psub(T, Src1); 1502 _psub(T, Src1);
1503 _movp(Dest, T); 1503 _movp(Dest, T);
1504 } break; 1504 } break;
1505 case InstArithmetic::Mul: { 1505 case InstArithmetic::Mul: {
1506 bool TypesAreValidForPmull = 1506 bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16;
1507 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1508 bool InstructionSetIsValidForPmull = 1507 bool InstructionSetIsValidForPmull =
1509 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; 1508 Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1;
1510 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1509 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1511 Variable *T = makeReg(Dest->getType()); 1510 Variable *T = makeReg(Ty);
1512 _movp(T, Src0); 1511 _movp(T, Src0);
1513 _pmull(T, Src0 == Src1 ? T : Src1); 1512 _pmull(T, Src0 == Src1 ? T : Src1);
1514 _movp(Dest, T); 1513 _movp(Dest, T);
1515 } else if (Dest->getType() == IceType_v4i32) { 1514 } else if (Ty == IceType_v4i32) {
1516 // Lowering sequence: 1515 // Lowering sequence:
1517 // Note: The mask arguments have index 0 on the left. 1516 // Note: The mask arguments have index 0 on the left.
1518 // 1517 //
1519 // movups T1, Src0 1518 // movups T1, Src0
1520 // pshufd T2, Src0, {1,0,3,0} 1519 // pshufd T2, Src0, {1,0,3,0}
1521 // pshufd T3, Src1, {1,0,3,0} 1520 // pshufd T3, Src1, {1,0,3,0}
1522 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1521 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1523 // pmuludq T1, Src1 1522 // pmuludq T1, Src1
1524 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1525 // pmuludq T2, T3 1524 // pmuludq T2, T3
(...skipping 17 matching lines...) Expand all
1543 Variable *T3 = makeReg(IceType_v4i32); 1542 Variable *T3 = makeReg(IceType_v4i32);
1544 Variable *T4 = makeReg(IceType_v4i32); 1543 Variable *T4 = makeReg(IceType_v4i32);
1545 _movp(T1, Src0); 1544 _movp(T1, Src0);
1546 _pshufd(T2, Src0, Mask1030); 1545 _pshufd(T2, Src0, Mask1030);
1547 _pshufd(T3, Src1, Mask1030); 1546 _pshufd(T3, Src1, Mask1030);
1548 _pmuludq(T1, Src1); 1547 _pmuludq(T1, Src1);
1549 _pmuludq(T2, T3); 1548 _pmuludq(T2, T3);
1550 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); 1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1551 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); 1550 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1552 _movp(Dest, T4); 1551 _movp(Dest, T4);
1553 } else if (Dest->getType() == IceType_v16i8) { 1552 } else if (Ty == IceType_v16i8) {
1554 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1553 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1555 } else { 1554 } else {
1556 llvm::report_fatal_error("Invalid vector multiply type"); 1555 llvm::report_fatal_error("Invalid vector multiply type");
1557 } 1556 }
1558 } break; 1557 } break;
1559 case InstArithmetic::Shl: 1558 case InstArithmetic::Shl:
1560 case InstArithmetic::Lshr: 1559 case InstArithmetic::Lshr:
1561 case InstArithmetic::Ashr: 1560 case InstArithmetic::Ashr:
1562 case InstArithmetic::Udiv: 1561 case InstArithmetic::Udiv:
1563 case InstArithmetic::Urem: 1562 case InstArithmetic::Urem:
1564 case InstArithmetic::Sdiv: 1563 case InstArithmetic::Sdiv:
1565 case InstArithmetic::Srem: 1564 case InstArithmetic::Srem:
1566 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1565 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1567 break; 1566 break;
1568 case InstArithmetic::Fadd: { 1567 case InstArithmetic::Fadd: {
1569 Variable *T = makeReg(Dest->getType()); 1568 Variable *T = makeReg(Ty);
1570 _movp(T, Src0); 1569 _movp(T, Src0);
1571 _addps(T, Src1); 1570 _addps(T, Src1);
1572 _movp(Dest, T); 1571 _movp(Dest, T);
1573 } break; 1572 } break;
1574 case InstArithmetic::Fsub: { 1573 case InstArithmetic::Fsub: {
1575 Variable *T = makeReg(Dest->getType()); 1574 Variable *T = makeReg(Ty);
1576 _movp(T, Src0); 1575 _movp(T, Src0);
1577 _subps(T, Src1); 1576 _subps(T, Src1);
1578 _movp(Dest, T); 1577 _movp(Dest, T);
1579 } break; 1578 } break;
1580 case InstArithmetic::Fmul: { 1579 case InstArithmetic::Fmul: {
1581 Variable *T = makeReg(Dest->getType()); 1580 Variable *T = makeReg(Ty);
1582 _movp(T, Src0); 1581 _movp(T, Src0);
1583 _mulps(T, Src0 == Src1 ? T : Src1); 1582 _mulps(T, Src0 == Src1 ? T : Src1);
1584 _movp(Dest, T); 1583 _movp(Dest, T);
1585 } break; 1584 } break;
1586 case InstArithmetic::Fdiv: { 1585 case InstArithmetic::Fdiv: {
1587 Variable *T = makeReg(Dest->getType()); 1586 Variable *T = makeReg(Ty);
1588 _movp(T, Src0); 1587 _movp(T, Src0);
1589 _divps(T, Src1); 1588 _divps(T, Src1);
1590 _movp(Dest, T); 1589 _movp(Dest, T);
1591 } break; 1590 } break;
1592 case InstArithmetic::Frem: 1591 case InstArithmetic::Frem:
1593 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1592 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1594 break; 1593 break;
1595 } 1594 }
1596 return; 1595 return;
1597 } 1596 }
(...skipping 28 matching lines...) Expand all
1626 _sub(T, Src1); 1625 _sub(T, Src1);
1627 _mov(Dest, T); 1626 _mov(Dest, T);
1628 break; 1627 break;
1629 case InstArithmetic::Mul: 1628 case InstArithmetic::Mul:
1630 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1631 if (optimizeScalarMul(Dest, Src0, C->getValue())) 1630 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1632 return; 1631 return;
1633 } 1632 }
1634 // The 8-bit version of imul only allows the form "imul r/m8" where T must 1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must
1635 // be in al. 1634 // be in al.
1636 if (isByteSizedArithType(Dest->getType())) { 1635 if (isByteSizedArithType(Ty)) {
1637 _mov(T, Src0, Traits::RegisterSet::Reg_al); 1636 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1638 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1639 _imul(T, Src0 == Src1 ? T : Src1); 1638 _imul(T, Src0 == Src1 ? T : Src1);
1640 _mov(Dest, T); 1639 _mov(Dest, T);
1641 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1642 T = makeReg(Dest->getType()); 1641 T = makeReg(Ty);
1643 _imul_imm(T, Src0, ImmConst); 1642 _imul_imm(T, Src0, ImmConst);
1644 _mov(Dest, T); 1643 _mov(Dest, T);
1645 } else { 1644 } else {
1646 _mov(T, Src0); 1645 _mov(T, Src0);
1647 _imul(T, Src0 == Src1 ? T : Src1); 1646 _imul(T, Src0 == Src1 ? T : Src1);
1648 _mov(Dest, T); 1647 _mov(Dest, T);
1649 } 1648 }
1650 break; 1649 break;
1651 case InstArithmetic::Shl: 1650 case InstArithmetic::Shl:
1652 _mov(T, Src0); 1651 _mov(T, Src0);
1653 if (!llvm::isa<ConstantInteger32>(Src1)) { 1652 if (!llvm::isa<ConstantInteger32>(Src1))
1654 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); 1653 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
1655 _mov(Cl, Src1);
1656 Src1 = Cl;
1657 }
1658 _shl(T, Src1); 1654 _shl(T, Src1);
1659 _mov(Dest, T); 1655 _mov(Dest, T);
1660 break; 1656 break;
1661 case InstArithmetic::Lshr: 1657 case InstArithmetic::Lshr:
1662 _mov(T, Src0); 1658 _mov(T, Src0);
1663 if (!llvm::isa<ConstantInteger32>(Src1)) { 1659 if (!llvm::isa<ConstantInteger32>(Src1))
1664 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); 1660 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
1665 _mov(Cl, Src1);
1666 Src1 = Cl;
1667 }
1668 _shr(T, Src1); 1661 _shr(T, Src1);
1669 _mov(Dest, T); 1662 _mov(Dest, T);
1670 break; 1663 break;
1671 case InstArithmetic::Ashr: 1664 case InstArithmetic::Ashr:
1672 _mov(T, Src0); 1665 _mov(T, Src0);
1673 if (!llvm::isa<ConstantInteger32>(Src1)) { 1666 if (!llvm::isa<ConstantInteger32>(Src1))
1674 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); 1667 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
1675 _mov(Cl, Src1);
1676 Src1 = Cl;
1677 }
1678 _sar(T, Src1); 1668 _sar(T, Src1);
1679 _mov(Dest, T); 1669 _mov(Dest, T);
1680 break; 1670 break;
1681 case InstArithmetic::Udiv: 1671 case InstArithmetic::Udiv: {
1682 // div and idiv are the few arithmetic operators that do not allow 1672 // div and idiv are the few arithmetic operators that do not allow
1683 // immediates as the operand. 1673 // immediates as the operand.
1684 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1674 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1685 if (isByteSizedArithType(Dest->getType())) { 1675 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1686 // For 8-bit unsigned division we need to zero-extend al into ah. A mov 1676 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1687 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 1677 switch (Ty) {
1688 // assembler refuses to encode %ah (encoding %spl with a REX prefix 1678 default:
1689 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah 1679 llvm_unreachable("Bad type for udiv");
1690 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and 1680 // fallthrough
1691 // d[%lh], which means the X86 target lowering (and the register 1681 case IceType_i32:
1692 // allocator) would have to be aware of this restriction. For now, we 1682 break;
1693 // simply zero %eax completely, and move the dividend into %al. 1683 case IceType_i16:
1694 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1684 Eax = Traits::RegisterSet::Reg_ax;
1695 Context.insert(InstFakeDef::create(Func, T_eax)); 1685 Edx = Traits::RegisterSet::Reg_dx;
1696 _xor(T_eax, T_eax); 1686 break;
1697 _mov(T, Src0, Traits::RegisterSet::Reg_al); 1687 case IceType_i8:
1698 _div(T, Src1, T); 1688 Eax = Traits::RegisterSet::Reg_al;
1699 _mov(Dest, T); 1689 Edx = Traits::RegisterSet::Reg_ah;
1700 Context.insert(InstFakeUse::create(Func, T_eax)); 1690 break;
1701 } else {
1702 Type Ty = Dest->getType();
1703 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1704 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1705 switch (Ty) {
1706 default:
1707 llvm_unreachable("Bad type for udiv");
1708 // fallthrough
1709 case IceType_i32:
1710 break;
1711 case IceType_i16:
1712 Eax = Traits::RegisterSet::Reg_ax;
1713 Edx = Traits::RegisterSet::Reg_dx;
1714 break;
1715 }
1716 Constant *Zero = Ctx->getConstantZero(Ty);
1717 _mov(T, Src0, Eax);
1718 _mov(T_edx, Zero, Edx);
1719 _div(T, Src1, T_edx);
1720 _mov(Dest, T);
1721 } 1691 }
1722 break; 1692 _mov(T, Src0, Eax);
1693 _mov(T_edx, Ctx->getConstantZero(Ty), Edx);
1694 _div(T, Src1, T_edx);
1695 _mov(Dest, T);
1696 } break;
1723 case InstArithmetic::Sdiv: 1697 case InstArithmetic::Sdiv:
1724 // TODO(stichnot): Enable this after doing better performance and cross 1698 // TODO(stichnot): Enable this after doing better performance and cross
1725 // testing. 1699 // testing.
1726 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1700 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1727 // Optimize division by constant power of 2, but not for Om1 or O0, just 1701 // Optimize division by constant power of 2, but not for Om1 or O0, just
1728 // to keep things simple there. 1702 // to keep things simple there.
1729 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1703 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1730 int32_t Divisor = C->getValue(); 1704 int32_t Divisor = C->getValue();
1731 uint32_t UDivisor = static_cast<uint32_t>(Divisor); 1705 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1732 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 1706 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1733 uint32_t LogDiv = llvm::Log2_32(UDivisor); 1707 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1734 Type Ty = Dest->getType();
1735 // LLVM does the following for dest=src/(1<<log): 1708 // LLVM does the following for dest=src/(1<<log):
1736 // t=src 1709 // t=src
1737 // sar t,typewidth-1 // -1 if src is negative, 0 if not 1710 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1738 // shr t,typewidth-log 1711 // shr t,typewidth-log
1739 // add t,src 1712 // add t,src
1740 // sar t,log 1713 // sar t,log
1741 // dest=t 1714 // dest=t
1742 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); 1715 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1743 _mov(T, Src0); 1716 _mov(T, Src0);
1744 // If for some reason we are dividing by 1, just treat it like an 1717 // If for some reason we are dividing by 1, just treat it like an
1745 // assignment. 1718 // assignment.
1746 if (LogDiv > 0) { 1719 if (LogDiv > 0) {
1747 // The initial sar is unnecessary when dividing by 2. 1720 // The initial sar is unnecessary when dividing by 2.
1748 if (LogDiv > 1) 1721 if (LogDiv > 1)
1749 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 1722 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1750 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 1723 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1751 _add(T, Src0); 1724 _add(T, Src0);
1752 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1725 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1753 } 1726 }
1754 _mov(Dest, T); 1727 _mov(Dest, T);
1755 return; 1728 return;
1756 } 1729 }
1757 } 1730 }
1758 } 1731 }
1759 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1732 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1760 switch (Type Ty = Dest->getType()) { 1733 switch (Ty) {
1761 default: 1734 default:
1762 llvm_unreachable("Bad type for sdiv"); 1735 llvm_unreachable("Bad type for sdiv");
1763 // fallthrough 1736 // fallthrough
1764 case IceType_i32: 1737 case IceType_i32:
1765 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); 1738 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1766 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1739 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1767 break; 1740 break;
1768 case IceType_i16: 1741 case IceType_i16:
1769 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); 1742 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1770 _mov(T, Src0, Traits::RegisterSet::Reg_ax); 1743 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1771 break; 1744 break;
1772 case IceType_i8: 1745 case IceType_i8:
1773 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); 1746 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1774 _mov(T, Src0, Traits::RegisterSet::Reg_al); 1747 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1775 break; 1748 break;
1776 } 1749 }
1777 _cbwdq(T_edx, T); 1750 _cbwdq(T_edx, T);
1778 _idiv(T, Src1, T_edx); 1751 _idiv(T, Src1, T_edx);
1779 _mov(Dest, T); 1752 _mov(Dest, T);
1780 break; 1753 break;
1781 case InstArithmetic::Urem: 1754 case InstArithmetic::Urem: {
1782 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1755 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1783 if (isByteSizedArithType(Dest->getType())) { 1756 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1784 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 1757 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1785 Context.insert(InstFakeDef::create(Func, T_eax)); 1758 switch (Ty) {
1786 _xor(T_eax, T_eax); 1759 default:
1787 _mov(T, Src0, Traits::RegisterSet::Reg_al); 1760 llvm_unreachable("Bad type for urem");
1788 _div(T, Src1, T); 1761 // fallthrough
1789 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't 1762 case IceType_i32:
1790 // mov %ah, %al because it would make x86-64 codegen more complicated. If 1763 break;
1791 // this ever becomes a problem we can introduce a pseudo rem instruction 1764 case IceType_i16:
1792 // that returns the remainder in %al directly (and uses a mov for copying 1765 Eax = Traits::RegisterSet::Reg_ax;
1793 // %ah to %al.) 1766 Edx = Traits::RegisterSet::Reg_dx;
1794 static constexpr uint8_t AlSizeInBits = 8; 1767 break;
1795 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); 1768 case IceType_i8:
1796 _mov(Dest, T); 1769 Eax = Traits::RegisterSet::Reg_al;
1797 Context.insert(InstFakeUse::create(Func, T_eax)); 1770 Edx = Traits::RegisterSet::Reg_ah;
1798 } else { 1771 break;
1799 Type Ty = Dest->getType();
1800 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1801 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1802 switch (Ty) {
1803 default:
1804 llvm_unreachable("Bad type for urem");
1805 // fallthrough
1806 case IceType_i32:
1807 break;
1808 case IceType_i16:
1809 Eax = Traits::RegisterSet::Reg_ax;
1810 Edx = Traits::RegisterSet::Reg_dx;
1811 break;
1812 }
1813 Constant *Zero = Ctx->getConstantZero(Ty);
1814 T_edx = makeReg(Dest->getType(), Edx);
1815 _mov(T_edx, Zero);
1816 _mov(T, Src0, Eax);
1817 _div(T_edx, Src1, T);
1818 _mov(Dest, T_edx);
1819 } 1772 }
1820 break; 1773 T_edx = makeReg(Ty, Edx);
1821 case InstArithmetic::Srem: 1774 _mov(T_edx, Ctx->getConstantZero(Ty));
1775 _mov(T, Src0, Eax);
1776 _div(T_edx, Src1, T);
1777 _mov(Dest, T_edx);
1778 } break;
1779 case InstArithmetic::Srem: {
1822 // TODO(stichnot): Enable this after doing better performance and cross 1780 // TODO(stichnot): Enable this after doing better performance and cross
1823 // testing. 1781 // testing.
1824 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1782 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1825 // Optimize mod by constant power of 2, but not for Om1 or O0, just to 1783 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
1826 // keep things simple there. 1784 // keep things simple there.
1827 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1785 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1828 int32_t Divisor = C->getValue(); 1786 int32_t Divisor = C->getValue();
1829 uint32_t UDivisor = static_cast<uint32_t>(Divisor); 1787 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1830 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 1788 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1831 uint32_t LogDiv = llvm::Log2_32(UDivisor); 1789 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1832 Type Ty = Dest->getType();
1833 // LLVM does the following for dest=src%(1<<log): 1790 // LLVM does the following for dest=src%(1<<log):
1834 // t=src 1791 // t=src
1835 // sar t,typewidth-1 // -1 if src is negative, 0 if not 1792 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1836 // shr t,typewidth-log 1793 // shr t,typewidth-log
1837 // add t,src 1794 // add t,src
1838 // and t, -(1<<log) 1795 // and t, -(1<<log)
1839 // sub t,src 1796 // sub t,src
1840 // neg t 1797 // neg t
1841 // dest=t 1798 // dest=t
1842 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); 1799 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
(...skipping 10 matching lines...) Expand all
1853 _add(T, Src0); 1810 _add(T, Src0);
1854 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 1811 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1855 _sub(T, Src0); 1812 _sub(T, Src0);
1856 _neg(T); 1813 _neg(T);
1857 _mov(Dest, T); 1814 _mov(Dest, T);
1858 return; 1815 return;
1859 } 1816 }
1860 } 1817 }
1861 } 1818 }
1862 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1819 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1863 switch (Type Ty = Dest->getType()) { 1820 uint32_t Eax = Traits::RegisterSet::Reg_eax;
1821 uint32_t Edx = Traits::RegisterSet::Reg_edx;
1822 switch (Ty) {
1864 default: 1823 default:
1865 llvm_unreachable("Bad type for srem"); 1824 llvm_unreachable("Bad type for srem");
1866 // fallthrough 1825 // fallthrough
1867 case IceType_i32: 1826 case IceType_i32:
1868 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1869 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1870 _cbwdq(T_edx, T);
1871 _idiv(T_edx, Src1, T);
1872 _mov(Dest, T_edx);
1873 break; 1827 break;
1874 case IceType_i16: 1828 case IceType_i16:
1875 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); 1829 Eax = Traits::RegisterSet::Reg_ax;
1876 _mov(T, Src0, Traits::RegisterSet::Reg_ax); 1830 Edx = Traits::RegisterSet::Reg_dx;
1877 _cbwdq(T_edx, T);
1878 _idiv(T_edx, Src1, T);
1879 _mov(Dest, T_edx);
1880 break; 1831 break;
1881 case IceType_i8: 1832 case IceType_i8:
1882 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); 1833 Eax = Traits::RegisterSet::Reg_al;
1883 // TODO(stichnot): Use register ah for T_edx, and remove the _shr(). 1834 Edx = Traits::RegisterSet::Reg_ah;
1884 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah);
1885 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1886 _cbwdq(T_edx, T);
1887 _idiv(T_edx, Src1, T);
1888 static constexpr uint8_t AlSizeInBits = 8;
1889 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits));
1890 _mov(Dest, T_edx);
1891 break; 1835 break;
1892 } 1836 }
1893 break; 1837 T_edx = makeReg(Ty, Edx);
1838 _mov(T, Src0, Eax);
1839 _cbwdq(T_edx, T);
1840 _idiv(T_edx, Src1, T);
1841 _mov(Dest, T_edx);
1842 } break;
1894 case InstArithmetic::Fadd: 1843 case InstArithmetic::Fadd:
1895 _mov(T, Src0); 1844 _mov(T, Src0);
1896 _addss(T, Src1); 1845 _addss(T, Src1);
1897 _mov(Dest, T); 1846 _mov(Dest, T);
1898 break; 1847 break;
1899 case InstArithmetic::Fsub: 1848 case InstArithmetic::Fsub:
1900 _mov(T, Src0); 1849 _mov(T, Src0);
1901 _subss(T, Src1); 1850 _subss(T, Src1);
1902 _mov(Dest, T); 1851 _mov(Dest, T);
1903 break; 1852 break;
1904 case InstArithmetic::Fmul: 1853 case InstArithmetic::Fmul:
1905 _mov(T, Src0); 1854 _mov(T, Src0);
1906 _mulss(T, Src0 == Src1 ? T : Src1); 1855 _mulss(T, Src0 == Src1 ? T : Src1);
1907 _mov(Dest, T); 1856 _mov(Dest, T);
1908 break; 1857 break;
1909 case InstArithmetic::Fdiv: 1858 case InstArithmetic::Fdiv:
1910 _mov(T, Src0); 1859 _mov(T, Src0);
1911 _divss(T, Src1); 1860 _divss(T, Src1);
1912 _mov(Dest, T); 1861 _mov(Dest, T);
1913 break; 1862 break;
1914 case InstArithmetic::Frem: { 1863 case InstArithmetic::Frem: {
1915 constexpr SizeT MaxSrcs = 2; 1864 constexpr SizeT MaxSrcs = 2;
1916 Type Ty = Dest->getType();
1917 InstCall *Call = makeHelperCall( 1865 InstCall *Call = makeHelperCall(
1918 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 1866 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1919 Call->addArg(Src0); 1867 Call->addArg(Src0);
1920 Call->addArg(Src1); 1868 Call->addArg(Src1);
1921 return lowerCall(Call); 1869 return lowerCall(Call);
1922 } 1870 }
1923 } 1871 }
1924 } 1872 }
1925 1873
1926 template <class Machine> 1874 template <class Machine>
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1984 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1932 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1985 _cmp(Src0, Zero); 1933 _cmp(Src0, Zero);
1986 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 1934 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1987 } 1935 }
1988 1936
1989 template <class Machine> 1937 template <class Machine>
1990 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { 1938 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1991 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 1939 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1992 InstCast::OpKind CastKind = Inst->getCastKind(); 1940 InstCast::OpKind CastKind = Inst->getCastKind();
1993 Variable *Dest = Inst->getDest(); 1941 Variable *Dest = Inst->getDest();
1942 Type DestTy = Dest->getType();
1994 switch (CastKind) { 1943 switch (CastKind) {
1995 default: 1944 default:
1996 Func->setError("Cast type not supported"); 1945 Func->setError("Cast type not supported");
1997 return; 1946 return;
1998 case InstCast::Sext: { 1947 case InstCast::Sext: {
1999 // Src0RM is the source operand legalized to physical register or memory, 1948 // Src0RM is the source operand legalized to physical register or memory,
2000 // but not immediate, since the relevant x86 native instructions don't 1949 // but not immediate, since the relevant x86 native instructions don't
2001 // allow an immediate operand. If the operand is an immediate, we could 1950 // allow an immediate operand. If the operand is an immediate, we could
2002 // consider computing the strength-reduced result at translation time, but 1951 // consider computing the strength-reduced result at translation time, but
2003 // we're unlikely to see something like that in the bitcode that the 1952 // we're unlikely to see something like that in the bitcode that the
2004 // optimizer wouldn't have already taken care of. 1953 // optimizer wouldn't have already taken care of.
2005 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1954 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2006 if (isVectorType(Dest->getType())) { 1955 if (isVectorType(DestTy)) {
2007 Type DestTy = Dest->getType();
2008 if (DestTy == IceType_v16i8) { 1956 if (DestTy == IceType_v16i8) {
2009 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 1957 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2010 Variable *OneMask = makeVectorOfOnes(Dest->getType()); 1958 Variable *OneMask = makeVectorOfOnes(DestTy);
2011 Variable *T = makeReg(DestTy); 1959 Variable *T = makeReg(DestTy);
2012 _movp(T, Src0RM); 1960 _movp(T, Src0RM);
2013 _pand(T, OneMask); 1961 _pand(T, OneMask);
2014 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 1962 Variable *Zeros = makeVectorOfZeros(DestTy);
2015 _pcmpgt(T, Zeros); 1963 _pcmpgt(T, Zeros);
2016 _movp(Dest, T); 1964 _movp(Dest, T);
2017 } else { 1965 } else {
2018 /// width = width(elty) - 1; dest = (src << width) >> width 1966 /// width = width(elty) - 1; dest = (src << width) >> width
2019 SizeT ShiftAmount = 1967 SizeT ShiftAmount =
2020 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1968 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2021 1; 1969 1;
2022 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 1970 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2023 Variable *T = makeReg(DestTy); 1971 Variable *T = makeReg(DestTy);
2024 _movp(T, Src0RM); 1972 _movp(T, Src0RM);
2025 _psll(T, ShiftConstant); 1973 _psll(T, ShiftConstant);
2026 _psra(T, ShiftConstant); 1974 _psra(T, ShiftConstant);
2027 _movp(Dest, T); 1975 _movp(Dest, T);
2028 } 1976 }
2029 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1977 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2030 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1978 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
2031 Constant *Shift = Ctx->getConstantInt32(31); 1979 Constant *Shift = Ctx->getConstantInt32(31);
2032 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1980 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2033 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1981 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2034 Variable *T_Lo = makeReg(DestLo->getType()); 1982 Variable *T_Lo = makeReg(DestLo->getType());
2035 if (Src0RM->getType() == IceType_i32) { 1983 if (Src0RM->getType() == IceType_i32) {
2036 _mov(T_Lo, Src0RM); 1984 _mov(T_Lo, Src0RM);
2037 } else if (Src0RM->getType() == IceType_i1) { 1985 } else if (Src0RM->getType() == IceType_i1) {
2038 _movzx(T_Lo, Src0RM); 1986 _movzx(T_Lo, Src0RM);
2039 _shl(T_Lo, Shift); 1987 _shl(T_Lo, Shift);
2040 _sar(T_Lo, Shift); 1988 _sar(T_Lo, Shift);
2041 } else { 1989 } else {
2042 _movsx(T_Lo, Src0RM); 1990 _movsx(T_Lo, Src0RM);
2043 } 1991 }
2044 _mov(DestLo, T_Lo); 1992 _mov(DestLo, T_Lo);
2045 Variable *T_Hi = nullptr; 1993 Variable *T_Hi = nullptr;
2046 _mov(T_Hi, T_Lo); 1994 _mov(T_Hi, T_Lo);
2047 if (Src0RM->getType() != IceType_i1) 1995 if (Src0RM->getType() != IceType_i1)
2048 // For i1, the sar instruction is already done above. 1996 // For i1, the sar instruction is already done above.
2049 _sar(T_Hi, Shift); 1997 _sar(T_Hi, Shift);
2050 _mov(DestHi, T_Hi); 1998 _mov(DestHi, T_Hi);
2051 } else if (Src0RM->getType() == IceType_i1) { 1999 } else if (Src0RM->getType() == IceType_i1) {
2052 // t1 = src 2000 // t1 = src
2053 // shl t1, dst_bitwidth - 1 2001 // shl t1, dst_bitwidth - 1
2054 // sar t1, dst_bitwidth - 1 2002 // sar t1, dst_bitwidth - 1
2055 // dst = t1 2003 // dst = t1
2056 size_t DestBits = 2004 size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy);
2057 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2058 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); 2005 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2059 Variable *T = makeReg(Dest->getType()); 2006 Variable *T = makeReg(DestTy);
2060 if (typeWidthInBytes(Dest->getType()) <= 2007 if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) {
2061 typeWidthInBytes(Src0RM->getType())) {
2062 _mov(T, Src0RM); 2008 _mov(T, Src0RM);
2063 } else { 2009 } else {
2064 // Widen the source using movsx or movzx. (It doesn't matter which one, 2010 // Widen the source using movsx or movzx. (It doesn't matter which one,
2065 // since the following shl/sar overwrite the bits.) 2011 // since the following shl/sar overwrite the bits.)
2066 _movzx(T, Src0RM); 2012 _movzx(T, Src0RM);
2067 } 2013 }
2068 _shl(T, ShiftAmount); 2014 _shl(T, ShiftAmount);
2069 _sar(T, ShiftAmount); 2015 _sar(T, ShiftAmount);
2070 _mov(Dest, T); 2016 _mov(Dest, T);
2071 } else { 2017 } else {
2072 // t1 = movsx src; dst = t1 2018 // t1 = movsx src; dst = t1
2073 Variable *T = makeReg(Dest->getType()); 2019 Variable *T = makeReg(DestTy);
2074 _movsx(T, Src0RM); 2020 _movsx(T, Src0RM);
2075 _mov(Dest, T); 2021 _mov(Dest, T);
2076 } 2022 }
2077 break; 2023 break;
2078 } 2024 }
2079 case InstCast::Zext: { 2025 case InstCast::Zext: {
2080 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2026 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2081 if (isVectorType(Dest->getType())) { 2027 if (isVectorType(DestTy)) {
2082 // onemask = materialize(1,1,...); dest = onemask & src 2028 // onemask = materialize(1,1,...); dest = onemask & src
2083 Type DestTy = Dest->getType();
2084 Variable *OneMask = makeVectorOfOnes(DestTy); 2029 Variable *OneMask = makeVectorOfOnes(DestTy);
2085 Variable *T = makeReg(DestTy); 2030 Variable *T = makeReg(DestTy);
2086 _movp(T, Src0RM); 2031 _movp(T, Src0RM);
2087 _pand(T, OneMask); 2032 _pand(T, OneMask);
2088 _movp(Dest, T); 2033 _movp(Dest, T);
2089 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 2034 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2090 // t1=movzx src; dst.lo=t1; dst.hi=0 2035 // t1=movzx src; dst.lo=t1; dst.hi=0
2091 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2036 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2092 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2037 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2093 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2038 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2094 Variable *Tmp = makeReg(DestLo->getType()); 2039 Variable *Tmp = makeReg(DestLo->getType());
2095 if (Src0RM->getType() == IceType_i32) { 2040 if (Src0RM->getType() == IceType_i32) {
2096 _mov(Tmp, Src0RM); 2041 _mov(Tmp, Src0RM);
2097 } else { 2042 } else {
2098 _movzx(Tmp, Src0RM); 2043 _movzx(Tmp, Src0RM);
2099 } 2044 }
2100 _mov(DestLo, Tmp); 2045 _mov(DestLo, Tmp);
2101 _mov(DestHi, Zero); 2046 _mov(DestHi, Zero);
2102 } else if (Src0RM->getType() == IceType_i1) { 2047 } else if (Src0RM->getType() == IceType_i1) {
2103 // t = Src0RM; Dest = t 2048 // t = Src0RM; Dest = t
2104 Type DestTy = Dest->getType();
2105 Variable *T = nullptr; 2049 Variable *T = nullptr;
2106 if (DestTy == IceType_i8) { 2050 if (DestTy == IceType_i8) {
2107 _mov(T, Src0RM); 2051 _mov(T, Src0RM);
2108 } else { 2052 } else {
2109 assert(DestTy != IceType_i1); 2053 assert(DestTy != IceType_i1);
2110 assert(Traits::Is64Bit || DestTy != IceType_i64); 2054 assert(Traits::Is64Bit || DestTy != IceType_i64);
2111 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. 2055 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2112 // In x86-64 we need to widen T to 64-bits to ensure that T -- if 2056 // In x86-64 we need to widen T to 64-bits to ensure that T -- if
2113 // written to the stack (i.e., in -Om1) will be fully zero-extended. 2057 // written to the stack (i.e., in -Om1) will be fully zero-extended.
2114 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32); 2058 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32);
2115 _movzx(T, Src0RM); 2059 _movzx(T, Src0RM);
2116 } 2060 }
2117 _mov(Dest, T); 2061 _mov(Dest, T);
2118 } else { 2062 } else {
2119 // t1 = movzx src; dst = t1 2063 // t1 = movzx src; dst = t1
2120 Variable *T = makeReg(Dest->getType()); 2064 Variable *T = makeReg(DestTy);
2121 _movzx(T, Src0RM); 2065 _movzx(T, Src0RM);
2122 _mov(Dest, T); 2066 _mov(Dest, T);
2123 } 2067 }
2124 break; 2068 break;
2125 } 2069 }
2126 case InstCast::Trunc: { 2070 case InstCast::Trunc: {
2127 if (isVectorType(Dest->getType())) { 2071 if (isVectorType(DestTy)) {
2128 // onemask = materialize(1,1,...); dst = src & onemask 2072 // onemask = materialize(1,1,...); dst = src & onemask
2129 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2073 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2130 Type Src0Ty = Src0RM->getType(); 2074 Type Src0Ty = Src0RM->getType();
2131 Variable *OneMask = makeVectorOfOnes(Src0Ty); 2075 Variable *OneMask = makeVectorOfOnes(Src0Ty);
2132 Variable *T = makeReg(Dest->getType()); 2076 Variable *T = makeReg(DestTy);
2133 _movp(T, Src0RM); 2077 _movp(T, Src0RM);
2134 _pand(T, OneMask); 2078 _pand(T, OneMask);
2135 _movp(Dest, T); 2079 _movp(Dest, T);
2080 } else if (DestTy == IceType_i1 || DestTy == IceType_i8) {
2081 // Make sure we truncate from and into valid registers.
2082 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2083 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
2084 Src0 = loOperand(Src0);
2085 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2086 Variable *T = copyToReg8(Src0RM);
2087 if (DestTy == IceType_i1)
2088 _and(T, Ctx->getConstantInt1(1));
2089 _mov(Dest, T);
2136 } else { 2090 } else {
2137 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2091 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2138 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) 2092 if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
2139 Src0 = loOperand(Src0); 2093 Src0 = loOperand(Src0);
2140 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2094 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2141 // t1 = trunc Src0RM; Dest = t1 2095 // t1 = trunc Src0RM; Dest = t1
2142 Variable *T = nullptr; 2096 Variable *T = makeReg(DestTy);
2143 _mov(T, Src0RM); 2097 _mov(T, Src0RM);
2144 if (Dest->getType() == IceType_i1)
2145 _and(T, Ctx->getConstantInt1(1));
2146 _mov(Dest, T); 2098 _mov(Dest, T);
2147 } 2099 }
2148 break; 2100 break;
2149 } 2101 }
2150 case InstCast::Fptrunc: 2102 case InstCast::Fptrunc:
2151 case InstCast::Fpext: { 2103 case InstCast::Fpext: {
2152 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2104 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2153 // t1 = cvt Src0RM; Dest = t1 2105 // t1 = cvt Src0RM; Dest = t1
2154 Variable *T = makeReg(Dest->getType()); 2106 Variable *T = makeReg(DestTy);
2155 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); 2107 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
2156 _mov(Dest, T); 2108 _mov(Dest, T);
2157 break; 2109 break;
2158 } 2110 }
2159 case InstCast::Fptosi: 2111 case InstCast::Fptosi:
2160 if (isVectorType(Dest->getType())) { 2112 if (isVectorType(DestTy)) {
2161 assert(Dest->getType() == IceType_v4i32 && 2113 assert(DestTy == IceType_v4i32 &&
2162 Inst->getSrc(0)->getType() == IceType_v4f32); 2114 Inst->getSrc(0)->getType() == IceType_v4f32);
2163 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2115 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2164 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2116 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2165 Src0RM = legalizeToReg(Src0RM); 2117 Src0RM = legalizeToReg(Src0RM);
2166 Variable *T = makeReg(Dest->getType()); 2118 Variable *T = makeReg(DestTy);
2167 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2119 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2168 _movp(Dest, T); 2120 _movp(Dest, T);
2169 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 2121 } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
2170 constexpr SizeT MaxSrcs = 1; 2122 constexpr SizeT MaxSrcs = 1;
2171 Type SrcType = Inst->getSrc(0)->getType(); 2123 Type SrcType = Inst->getSrc(0)->getType();
2172 InstCall *Call = 2124 InstCall *Call =
2173 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2125 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2174 : H_fptosi_f64_i64, 2126 : H_fptosi_f64_i64,
2175 Dest, MaxSrcs); 2127 Dest, MaxSrcs);
2176 Call->addArg(Inst->getSrc(0)); 2128 Call->addArg(Inst->getSrc(0));
2177 lowerCall(Call); 2129 lowerCall(Call);
2178 } else { 2130 } else {
2179 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2131 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2180 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2132 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2181 Variable *T_1 = nullptr; 2133 Variable *T_1 = nullptr;
2182 if (Traits::Is64Bit && Dest->getType() == IceType_i64) { 2134 if (Traits::Is64Bit && DestTy == IceType_i64) {
2183 T_1 = makeReg(IceType_i64); 2135 T_1 = makeReg(IceType_i64);
2184 } else { 2136 } else {
2185 assert(Dest->getType() != IceType_i64); 2137 assert(DestTy != IceType_i64);
2186 T_1 = makeReg(IceType_i32); 2138 T_1 = makeReg(IceType_i32);
2187 } 2139 }
2188 // cvt() requires its integer argument to be a GPR. 2140 // cvt() requires its integer argument to be a GPR.
2189 Variable *T_2 = makeReg(Dest->getType()); 2141 Variable *T_2 = makeReg(DestTy);
2142 if (isByteSizedType(DestTy)) {
2143 assert(T_1->getType() == IceType_i32);
2144 T_1->setRegClass(RCX86_Is32To8);
2145 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2146 }
2190 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2147 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2191 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2148 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2192 if (Dest->getType() == IceType_i1) 2149 if (DestTy == IceType_i1)
2193 _and(T_2, Ctx->getConstantInt1(1)); 2150 _and(T_2, Ctx->getConstantInt1(1));
2194 _mov(Dest, T_2); 2151 _mov(Dest, T_2);
2195 } 2152 }
2196 break; 2153 break;
2197 case InstCast::Fptoui: 2154 case InstCast::Fptoui:
2198 if (isVectorType(Dest->getType())) { 2155 if (isVectorType(DestTy)) {
2199 assert(Dest->getType() == IceType_v4i32 && 2156 assert(DestTy == IceType_v4i32 &&
2200 Inst->getSrc(0)->getType() == IceType_v4f32); 2157 Inst->getSrc(0)->getType() == IceType_v4f32);
2201 constexpr SizeT MaxSrcs = 1; 2158 constexpr SizeT MaxSrcs = 1;
2202 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); 2159 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
2203 Call->addArg(Inst->getSrc(0)); 2160 Call->addArg(Inst->getSrc(0));
2204 lowerCall(Call); 2161 lowerCall(Call);
2205 } else if (Dest->getType() == IceType_i64 || 2162 } else if (DestTy == IceType_i64 ||
2206 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { 2163 (!Traits::Is64Bit && DestTy == IceType_i32)) {
2207 // Use a helper for both x86-32 and x86-64. 2164 // Use a helper for both x86-32 and x86-64.
2208 constexpr SizeT MaxSrcs = 1; 2165 constexpr SizeT MaxSrcs = 1;
2209 Type DestType = Dest->getType();
2210 Type SrcType = Inst->getSrc(0)->getType(); 2166 Type SrcType = Inst->getSrc(0)->getType();
2211 IceString TargetString; 2167 IceString TargetString;
2212 if (Traits::Is64Bit) { 2168 if (Traits::Is64Bit) {
2213 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2169 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2214 : H_fptoui_f64_i64; 2170 : H_fptoui_f64_i64;
2215 } else if (isInt32Asserting32Or64(DestType)) { 2171 } else if (isInt32Asserting32Or64(DestTy)) {
2216 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 2172 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
2217 : H_fptoui_f64_i32; 2173 : H_fptoui_f64_i32;
2218 } else { 2174 } else {
2219 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 2175 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
2220 : H_fptoui_f64_i64; 2176 : H_fptoui_f64_i64;
2221 } 2177 }
2222 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2178 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2223 Call->addArg(Inst->getSrc(0)); 2179 Call->addArg(Inst->getSrc(0));
2224 lowerCall(Call); 2180 lowerCall(Call);
2225 return; 2181 return;
2226 } else { 2182 } else {
2227 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2183 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2228 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2184 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2229 assert(Dest->getType() != IceType_i64); 2185 assert(DestTy != IceType_i64);
2230 Variable *T_1 = nullptr; 2186 Variable *T_1 = nullptr;
2231 if (Traits::Is64Bit && Dest->getType() == IceType_i32) { 2187 if (Traits::Is64Bit && DestTy == IceType_i32) {
2232 T_1 = makeReg(IceType_i64); 2188 T_1 = makeReg(IceType_i64);
2233 } else { 2189 } else {
2234 assert(Dest->getType() != IceType_i32); 2190 assert(DestTy != IceType_i32);
2235 T_1 = makeReg(IceType_i32); 2191 T_1 = makeReg(IceType_i32);
2236 } 2192 }
2237 Variable *T_2 = makeReg(Dest->getType()); 2193 Variable *T_2 = makeReg(DestTy);
2194 if (isByteSizedType(DestTy)) {
2195 assert(T_1->getType() == IceType_i32);
2196 T_1->setRegClass(RCX86_Is32To8);
2197 T_2->setRegClass(RCX86_IsTrunc8Rcvr);
2198 }
2238 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); 2199 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2239 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2200 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2240 if (Dest->getType() == IceType_i1) 2201 if (DestTy == IceType_i1)
2241 _and(T_2, Ctx->getConstantInt1(1)); 2202 _and(T_2, Ctx->getConstantInt1(1));
2242 _mov(Dest, T_2); 2203 _mov(Dest, T_2);
2243 } 2204 }
2244 break; 2205 break;
2245 case InstCast::Sitofp: 2206 case InstCast::Sitofp:
2246 if (isVectorType(Dest->getType())) { 2207 if (isVectorType(DestTy)) {
2247 assert(Dest->getType() == IceType_v4f32 && 2208 assert(DestTy == IceType_v4f32 &&
2248 Inst->getSrc(0)->getType() == IceType_v4i32); 2209 Inst->getSrc(0)->getType() == IceType_v4i32);
2249 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2210 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2250 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2211 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2251 Src0RM = legalizeToReg(Src0RM); 2212 Src0RM = legalizeToReg(Src0RM);
2252 Variable *T = makeReg(Dest->getType()); 2213 Variable *T = makeReg(DestTy);
2253 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); 2214 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2254 _movp(Dest, T); 2215 _movp(Dest, T);
2255 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { 2216 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
2256 // Use a helper for x86-32. 2217 // Use a helper for x86-32.
2257 constexpr SizeT MaxSrcs = 1; 2218 constexpr SizeT MaxSrcs = 1;
2258 Type DestType = Dest->getType();
2259 InstCall *Call = 2219 InstCall *Call =
2260 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2220 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32
2261 : H_sitofp_i64_f64, 2221 : H_sitofp_i64_f64,
2262 Dest, MaxSrcs); 2222 Dest, MaxSrcs);
2263 // TODO: Call the correct compiler-rt helper function. 2223 // TODO: Call the correct compiler-rt helper function.
2264 Call->addArg(Inst->getSrc(0)); 2224 Call->addArg(Inst->getSrc(0));
2265 lowerCall(Call); 2225 lowerCall(Call);
2266 return; 2226 return;
2267 } else { 2227 } else {
2268 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2228 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2269 // Sign-extend the operand. 2229 // Sign-extend the operand.
2270 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2230 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2271 Variable *T_1 = nullptr; 2231 Variable *T_1 = nullptr;
2272 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { 2232 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
2273 T_1 = makeReg(IceType_i64); 2233 T_1 = makeReg(IceType_i64);
2274 } else { 2234 } else {
2275 assert(Src0RM->getType() != IceType_i64); 2235 assert(Src0RM->getType() != IceType_i64);
2276 T_1 = makeReg(IceType_i32); 2236 T_1 = makeReg(IceType_i32);
2277 } 2237 }
2278 Variable *T_2 = makeReg(Dest->getType()); 2238 Variable *T_2 = makeReg(DestTy);
2279 if (Src0RM->getType() == T_1->getType()) 2239 if (Src0RM->getType() == T_1->getType())
2280 _mov(T_1, Src0RM); 2240 _mov(T_1, Src0RM);
2281 else 2241 else
2282 _movsx(T_1, Src0RM); 2242 _movsx(T_1, Src0RM);
2283 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2243 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2284 _mov(Dest, T_2); 2244 _mov(Dest, T_2);
2285 } 2245 }
2286 break; 2246 break;
2287 case InstCast::Uitofp: { 2247 case InstCast::Uitofp: {
2288 Operand *Src0 = Inst->getSrc(0); 2248 Operand *Src0 = Inst->getSrc(0);
2289 if (isVectorType(Src0->getType())) { 2249 if (isVectorType(Src0->getType())) {
2290 assert(Dest->getType() == IceType_v4f32 && 2250 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32);
2291 Src0->getType() == IceType_v4i32);
2292 constexpr SizeT MaxSrcs = 1; 2251 constexpr SizeT MaxSrcs = 1;
2293 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2252 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2294 Call->addArg(Src0); 2253 Call->addArg(Src0);
2295 lowerCall(Call); 2254 lowerCall(Call);
2296 } else if (Src0->getType() == IceType_i64 || 2255 } else if (Src0->getType() == IceType_i64 ||
2297 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { 2256 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2298 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on 2257 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
2299 // x86-32. 2258 // x86-32.
2300 constexpr SizeT MaxSrcs = 1; 2259 constexpr SizeT MaxSrcs = 1;
2301 Type DestType = Dest->getType();
2302 IceString TargetString; 2260 IceString TargetString;
2303 if (isInt32Asserting32Or64(Src0->getType())) { 2261 if (isInt32Asserting32Or64(Src0->getType())) {
2304 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2262 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32
2305 : H_uitofp_i32_f64; 2263 : H_uitofp_i32_f64;
2306 } else { 2264 } else {
2307 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2265 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32
2308 : H_uitofp_i64_f64; 2266 : H_uitofp_i64_f64;
2309 } 2267 }
2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2268 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2311 Call->addArg(Src0); 2269 Call->addArg(Src0);
2312 lowerCall(Call); 2270 lowerCall(Call);
2313 return; 2271 return;
2314 } else { 2272 } else {
2315 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2273 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2316 // Zero-extend the operand. 2274 // Zero-extend the operand.
2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2275 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2318 Variable *T_1 = nullptr; 2276 Variable *T_1 = nullptr;
2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { 2277 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
2320 T_1 = makeReg(IceType_i64); 2278 T_1 = makeReg(IceType_i64);
2321 } else { 2279 } else {
2322 assert(Src0RM->getType() != IceType_i64); 2280 assert(Src0RM->getType() != IceType_i64);
2323 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); 2281 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
2324 T_1 = makeReg(IceType_i32); 2282 T_1 = makeReg(IceType_i32);
2325 } 2283 }
2326 Variable *T_2 = makeReg(Dest->getType()); 2284 Variable *T_2 = makeReg(DestTy);
2327 if (Src0RM->getType() == T_1->getType()) 2285 if (Src0RM->getType() == T_1->getType())
2328 _mov(T_1, Src0RM); 2286 _mov(T_1, Src0RM);
2329 else 2287 else
2330 _movzx(T_1, Src0RM); 2288 _movzx(T_1, Src0RM);
2331 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); 2289 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2332 _mov(Dest, T_2); 2290 _mov(Dest, T_2);
2333 } 2291 }
2334 break; 2292 break;
2335 } 2293 }
2336 case InstCast::Bitcast: { 2294 case InstCast::Bitcast: {
2337 Operand *Src0 = Inst->getSrc(0); 2295 Operand *Src0 = Inst->getSrc(0);
2338 if (Dest->getType() == Src0->getType()) { 2296 if (DestTy == Src0->getType()) {
2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); 2297 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2340 lowerAssign(Assign); 2298 lowerAssign(Assign);
2341 return; 2299 return;
2342 } 2300 }
2343 switch (Dest->getType()) { 2301 switch (DestTy) {
2344 default: 2302 default:
2345 llvm_unreachable("Unexpected Bitcast dest type"); 2303 llvm_unreachable("Unexpected Bitcast dest type");
2346 case IceType_i8: { 2304 case IceType_i8: {
2347 assert(Src0->getType() == IceType_v8i1); 2305 assert(Src0->getType() == IceType_v8i1);
2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1); 2306 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
2349 Call->addArg(Src0); 2307 Call->addArg(Src0);
2350 lowerCall(Call); 2308 lowerCall(Call);
2351 } break; 2309 } break;
2352 case IceType_i16: { 2310 case IceType_i16: {
2353 assert(Src0->getType() == IceType_v16i1); 2311 assert(Src0->getType() == IceType_v16i1);
2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1); 2312 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
2355 Call->addArg(Src0); 2313 Call->addArg(Src0);
2356 lowerCall(Call); 2314 lowerCall(Call);
2357 } break; 2315 } break;
2358 case IceType_i32: 2316 case IceType_i32:
2359 case IceType_f32: { 2317 case IceType_f32: {
2360 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2318 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2361 Type DestType = Dest->getType();
2362 Type SrcType = Src0RM->getType(); 2319 Type SrcType = Src0RM->getType();
2363 (void)DestType; 2320 assert((DestTy == IceType_i32 && SrcType == IceType_f32) ||
2364 assert((DestType == IceType_i32 && SrcType == IceType_f32) || 2321 (DestTy == IceType_f32 && SrcType == IceType_i32));
2365 (DestType == IceType_f32 && SrcType == IceType_i32));
2366 // a.i32 = bitcast b.f32 ==> 2322 // a.i32 = bitcast b.f32 ==>
2367 // t.f32 = b.f32 2323 // t.f32 = b.f32
2368 // s.f32 = spill t.f32 2324 // s.f32 = spill t.f32
2369 // a.i32 = s.f32 2325 // a.i32 = s.f32
2370 Variable *T = nullptr; 2326 Variable *T = nullptr;
2371 // TODO: Should be able to force a spill setup by calling legalize() with 2327 // TODO: Should be able to force a spill setup by calling legalize() with
2372 // Legal_Mem and not Legal_Reg or Legal_Imm. 2328 // Legal_Mem and not Legal_Reg or Legal_Imm.
2373 typename Traits::SpillVariable *SpillVar = 2329 typename Traits::SpillVariable *SpillVar =
2374 Func->makeVariable<typename Traits::SpillVariable>(SrcType); 2330 Func->makeVariable<typename Traits::SpillVariable>(SrcType);
2375 SpillVar->setLinkedTo(Dest); 2331 SpillVar->setLinkedTo(Dest);
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
2429 if (Traits::Is64Bit) { 2385 if (Traits::Is64Bit) {
2430 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2386 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2431 Variable *T = makeReg(IceType_f64); 2387 Variable *T = makeReg(IceType_f64);
2432 // Movd requires its fp argument (in this case, the bitcast 2388 // Movd requires its fp argument (in this case, the bitcast
2433 // destination) to be an xmm register. 2389 // destination) to be an xmm register.
2434 _movd(T, Src0RM); 2390 _movd(T, Src0RM);
2435 _mov(Dest, T); 2391 _mov(Dest, T);
2436 } else { 2392 } else {
2437 Src0 = legalize(Src0); 2393 Src0 = legalize(Src0);
2438 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { 2394 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2439 Variable *T = Func->makeVariable(Dest->getType()); 2395 Variable *T = Func->makeVariable(DestTy);
2440 _movq(T, Src0); 2396 _movq(T, Src0);
2441 _movq(Dest, T); 2397 _movq(Dest, T);
2442 break; 2398 break;
2443 } 2399 }
2444 // a.f64 = bitcast b.i64 ==> 2400 // a.f64 = bitcast b.i64 ==>
2445 // t_lo.i32 = b_lo.i32 2401 // t_lo.i32 = b_lo.i32
2446 // FakeDef(s.f64) 2402 // FakeDef(s.f64)
2447 // lo(s.f64) = t_lo.i32 2403 // lo(s.f64) = t_lo.i32
2448 // t_hi.i32 = b_hi.i32 2404 // t_hi.i32 = b_hi.i32
2449 // hi(s.f64) = t_hi.i32 2405 // hi(s.f64) = t_hi.i32
(...skipping 580 matching lines...) Expand 10 before | Expand all | Expand 10 after
3030 2986
3031 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || 2987 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
3032 InstructionSet >= Traits::SSE4_1) { 2988 InstructionSet >= Traits::SSE4_1) {
3033 // Use insertps, pinsrb, pinsrw, or pinsrd. 2989 // Use insertps, pinsrb, pinsrw, or pinsrd.
3034 Operand *ElementRM = 2990 Operand *ElementRM =
3035 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 2991 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
3036 Operand *SourceVectRM = 2992 Operand *SourceVectRM =
3037 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2993 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3038 Variable *T = makeReg(Ty); 2994 Variable *T = makeReg(Ty);
3039 _movp(T, SourceVectRM); 2995 _movp(T, SourceVectRM);
3040 if (Ty == IceType_v4f32) 2996 if (Ty == IceType_v4f32) {
3041 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); 2997 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
3042 else 2998 } else {
3043 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source 2999 // For the pinsrb and pinsrw instructions, when the source operand is a
3044 // operand is a register, it must be a full r32 register like eax, and not 3000 // register, it must be a full r32 register like eax, and not ax/al/ah.
3045 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates 3001 // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use
3046 // for the use of r16 and r8 by converting them through getBaseReg(), 3002 // of r16 and r8 by converting them through getBaseReg(), while emitIAS()
3047 // while emitIAS() validates that the original and base register encodings 3003 // validates that the original and base register encodings are the same.
3048 // are the same. But for an "interior" register like ah, it should 3004 if (ElementRM->getType() == IceType_i8 &&
3049 // probably be copied into an r32 via movzx so that the types work out. 3005 llvm::isa<Variable>(ElementRM)) {
3006 // Don't use ah/bh/ch/dh for pinsrb.
3007 ElementRM = copyToReg8(ElementRM);
3008 }
3050 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); 3009 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
3010 }
3051 _movp(Inst->getDest(), T); 3011 _movp(Inst->getDest(), T);
3052 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 3012 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
3053 // Use shufps or movss. 3013 // Use shufps or movss.
3054 Variable *ElementR = nullptr; 3014 Variable *ElementR = nullptr;
3055 Operand *SourceVectRM = 3015 Operand *SourceVectRM =
3056 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 3016 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
3057 3017
3058 if (InVectorElementTy == IceType_f32) { 3018 if (InVectorElementTy == IceType_f32) {
3059 // ElementR will be in an XMM register since it is floating point. 3019 // ElementR will be in an XMM register since it is floating point.
3060 ElementR = legalizeToReg(ElementToInsertNotLegalized); 3020 ElementR = legalizeToReg(ElementToInsertNotLegalized);
(...skipping 2286 matching lines...) Expand 10 before | Expand all | Expand 10 after
5347 // TODO(wala,stichnot): lea should not 5307 // TODO(wala,stichnot): lea should not
5348 // be required. The address of the stack slot is known at compile time 5308 // be required. The address of the stack slot is known at compile time
5349 // (although not until after addProlog()). 5309 // (although not until after addProlog()).
5350 constexpr Type PointerType = IceType_i32; 5310 constexpr Type PointerType = IceType_i32;
5351 Variable *Loc = makeReg(PointerType); 5311 Variable *Loc = makeReg(PointerType);
5352 _lea(Loc, Slot); 5312 _lea(Loc, Slot);
5353 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 5313 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5354 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); 5314 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
5355 } 5315 }
5356 5316
5317 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR.
5318 /// Src is assumed to already be legalized. If the source operand is known to
5319 /// be a memory or immediate operand, a simple mov will suffice. But if the
5320 /// source operand can be a physical register, then it must first be copied into
5321 /// a physical register that is truncable to 8-bit, then truncated into a
5322 /// physical register that can receive a truncation, and finally copied into the
5323 /// result 8-bit register (which in general can be any 8-bit register). For
5324 /// example, moving %ebp into %ah may be accomplished as:
5325 /// movl %ebp, %edx
5326 /// mov_trunc %edx, %dl // this redundant assignment is ultimately elided
5327 /// movb %dl, %ah
5328 /// On the other hand, moving a memory or immediate operand into ah:
5329 /// movb 4(%ebp), %ah
5330 /// movb $my_imm, %ah
5331 ///
5332 /// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not
5333 /// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead,
5334 /// use RegNum=NoRegister and then let the caller do a separate copy into
5335 /// Reg_ah.
5336 ///
5337 /// Note #2. ConstantRelocatable operands are also put through this process
5338 /// (not truncated directly) because our ELF emitter does R_386_32 relocations
5339 /// but not R_386_8 relocations.
5340 ///
5341 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8
5342 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper
5343 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument
5344 /// to the pinsrb instruction.
5345 template <class Machine>
5346 Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) {
5347 Type Ty = Src->getType();
5348 assert(isScalarIntegerType(Ty));
5349 assert(Ty != IceType_i1);
5350 Variable *Reg = makeReg(IceType_i8, RegNum);
5351 Reg->setRegClass(RCX86_IsTrunc8Rcvr);
5352 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) {
5353 Variable *SrcTruncable = makeReg(Ty);
5354 switch (Ty) {
5355 case IceType_i64:
5356 SrcTruncable->setRegClass(RCX86_Is64To8);
5357 break;
5358 case IceType_i32:
5359 SrcTruncable->setRegClass(RCX86_Is32To8);
5360 break;
5361 case IceType_i16:
5362 SrcTruncable->setRegClass(RCX86_Is16To8);
5363 break;
5364 default:
5365 // i8 - just use default register class
5366 break;
5367 }
5368 Variable *SrcRcvr = makeReg(IceType_i8);
5369 SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr);
5370 _mov(SrcTruncable, Src);
5371 _mov(SrcRcvr, SrcTruncable);
5372 Src = SrcRcvr;
5373 }
5374 _mov(Reg, Src);
5375 return Reg;
5376 }
5377
5357 /// Helper for legalize() to emit the right code to lower an operand to a 5378 /// Helper for legalize() to emit the right code to lower an operand to a
5358 /// register of the appropriate type. 5379 /// register of the appropriate type.
5359 template <class Machine> 5380 template <class Machine>
5360 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 5381 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5361 Type Ty = Src->getType(); 5382 Type Ty = Src->getType();
5362 Variable *Reg = makeReg(Ty, RegNum); 5383 Variable *Reg = makeReg(Ty, RegNum);
5363 if (isVectorType(Ty)) { 5384 if (isVectorType(Ty)) {
5364 _movp(Reg, Src); 5385 _movp(Reg, Src);
5365 } else { 5386 } else {
5366 _mov(Reg, Src); 5387 _mov(Reg, Src);
(...skipping 493 matching lines...) Expand 10 before | Expand all | Expand 10 after
5860 } 5881 }
5861 // the offset is not eligible for blinding or pooling, return the original 5882 // the offset is not eligible for blinding or pooling, return the original
5862 // mem operand 5883 // mem operand
5863 return MemOperand; 5884 return MemOperand;
5864 } 5885 }
5865 5886
5866 } // end of namespace X86Internal 5887 } // end of namespace X86Internal
5867 } // end of namespace Ice 5888 } // end of namespace Ice
5868 5889
5869 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5890 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTargetLoweringX86RegClass.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698