| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1209 } | 1209 } |
| 1210 } else { | 1210 } else { |
| 1211 // NON-CONSTANT CASES. | 1211 // NON-CONSTANT CASES. |
| 1212 Constant *BitTest = Ctx->getConstantInt32(0x20); | 1212 Constant *BitTest = Ctx->getConstantInt32(0x20); |
| 1213 typename Traits::Insts::Label *Label = | 1213 typename Traits::Insts::Label *Label = |
| 1214 Traits::Insts::Label::create(Func, this); | 1214 Traits::Insts::Label::create(Func, this); |
| 1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==> | 1215 // COMMON PREFIX OF: a=b SHIFT_OP c ==> |
| 1216 // t1:ecx = c.lo & 0xff | 1216 // t1:ecx = c.lo & 0xff |
| 1217 // t2 = b.lo | 1217 // t2 = b.lo |
| 1218 // t3 = b.hi | 1218 // t3 = b.hi |
| 1219 T_1 = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 1219 T_1 = copyToReg8(Src1Lo, Traits::RegisterSet::Reg_cl); |
| 1220 _mov(T_1, Src1Lo); | |
| 1221 _mov(T_2, Src0Lo); | 1220 _mov(T_2, Src0Lo); |
| 1222 _mov(T_3, Src0Hi); | 1221 _mov(T_3, Src0Hi); |
| 1223 switch (Op) { | 1222 switch (Op) { |
| 1224 default: | 1223 default: |
| 1225 assert(0 && "non-shift op"); | 1224 assert(0 && "non-shift op"); |
| 1226 break; | 1225 break; |
| 1227 case InstArithmetic::Shl: { | 1226 case InstArithmetic::Shl: { |
| 1228 // a=b<<c ==> | 1227 // a=b<<c ==> |
| 1229 // t3 = shld t3, t2, t1 | 1228 // t3 = shld t3, t2, t1 |
| 1230 // t2 = shl t2, t1 | 1229 // t2 = shl t2, t1 |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1288 // a.hi = t3 | 1287 // a.hi = t3 |
| 1289 Context.insert(Label); | 1288 Context.insert(Label); |
| 1290 _mov(DestLo, T_2); | 1289 _mov(DestLo, T_2); |
| 1291 _mov(DestHi, T_3); | 1290 _mov(DestHi, T_3); |
| 1292 } | 1291 } |
| 1293 } | 1292 } |
| 1294 | 1293 |
| 1295 template <class Machine> | 1294 template <class Machine> |
| 1296 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { | 1295 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| 1297 Variable *Dest = Inst->getDest(); | 1296 Variable *Dest = Inst->getDest(); |
| 1297 Type Ty = Dest->getType(); |
| 1298 Operand *Src0 = legalize(Inst->getSrc(0)); | 1298 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1299 Operand *Src1 = legalize(Inst->getSrc(1)); | 1299 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1300 if (Inst->isCommutative()) { | 1300 if (Inst->isCommutative()) { |
| 1301 uint32_t SwapCount = 0; | 1301 uint32_t SwapCount = 0; |
| 1302 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { | 1302 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) { |
| 1303 std::swap(Src0, Src1); | 1303 std::swap(Src0, Src1); |
| 1304 ++SwapCount; | 1304 ++SwapCount; |
| 1305 } | 1305 } |
| 1306 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) { | 1306 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) { |
| 1307 std::swap(Src0, Src1); | 1307 std::swap(Src0, Src1); |
| 1308 ++SwapCount; | 1308 ++SwapCount; |
| 1309 } | 1309 } |
| 1310 // Improve two-address code patterns by avoiding a copy to the dest | 1310 // Improve two-address code patterns by avoiding a copy to the dest |
| 1311 // register when one of the source operands ends its lifetime here. | 1311 // register when one of the source operands ends its lifetime here. |
| 1312 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) { | 1312 if (!Inst->isLastUse(Src0) && Inst->isLastUse(Src1)) { |
| 1313 std::swap(Src0, Src1); | 1313 std::swap(Src0, Src1); |
| 1314 ++SwapCount; | 1314 ++SwapCount; |
| 1315 } | 1315 } |
| 1316 assert(SwapCount <= 1); | 1316 assert(SwapCount <= 1); |
| 1317 (void)SwapCount; | 1317 (void)SwapCount; |
| 1318 } | 1318 } |
| 1319 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1319 if (!Traits::Is64Bit && Ty == IceType_i64) { |
| 1320 // These x86-32 helper-call-involved instructions are lowered in this | 1320 // These x86-32 helper-call-involved instructions are lowered in this |
| 1321 // separate switch. This is because loOperand() and hiOperand() may insert | 1321 // separate switch. This is because loOperand() and hiOperand() may insert |
| 1322 // redundant instructions for constant blinding and pooling. Such redundant | 1322 // redundant instructions for constant blinding and pooling. Such redundant |
| 1323 // instructions will fail liveness analysis under -Om1 setting. And, | 1323 // instructions will fail liveness analysis under -Om1 setting. And, |
| 1324 // actually these arguments do not need to be processed with loOperand() | 1324 // actually these arguments do not need to be processed with loOperand() |
| 1325 // and hiOperand() to be used. | 1325 // and hiOperand() to be used. |
| 1326 switch (Inst->getOp()) { | 1326 switch (Inst->getOp()) { |
| 1327 case InstArithmetic::Udiv: { | 1327 case InstArithmetic::Udiv: { |
| 1328 constexpr SizeT MaxSrcs = 2; | 1328 constexpr SizeT MaxSrcs = 2; |
| 1329 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); | 1329 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); |
| (...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1456 case InstArithmetic::Udiv: | 1456 case InstArithmetic::Udiv: |
| 1457 case InstArithmetic::Sdiv: | 1457 case InstArithmetic::Sdiv: |
| 1458 case InstArithmetic::Urem: | 1458 case InstArithmetic::Urem: |
| 1459 case InstArithmetic::Srem: | 1459 case InstArithmetic::Srem: |
| 1460 llvm_unreachable("Call-helper-involved instruction for i64 type \ | 1460 llvm_unreachable("Call-helper-involved instruction for i64 type \ |
| 1461 should have already been handled before"); | 1461 should have already been handled before"); |
| 1462 break; | 1462 break; |
| 1463 } | 1463 } |
| 1464 return; | 1464 return; |
| 1465 } | 1465 } |
| 1466 if (isVectorType(Dest->getType())) { | 1466 if (isVectorType(Ty)) { |
| 1467 // TODO: Trap on integer divide and integer modulo by zero. See: | 1467 // TODO: Trap on integer divide and integer modulo by zero. See: |
| 1468 // https://code.google.com/p/nativeclient/issues/detail?id=3899 | 1468 // https://code.google.com/p/nativeclient/issues/detail?id=3899 |
| 1469 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) | 1469 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) |
| 1470 Src1 = legalizeToReg(Src1); | 1470 Src1 = legalizeToReg(Src1); |
| 1471 switch (Inst->getOp()) { | 1471 switch (Inst->getOp()) { |
| 1472 case InstArithmetic::_num: | 1472 case InstArithmetic::_num: |
| 1473 llvm_unreachable("Unknown arithmetic operator"); | 1473 llvm_unreachable("Unknown arithmetic operator"); |
| 1474 break; | 1474 break; |
| 1475 case InstArithmetic::Add: { | 1475 case InstArithmetic::Add: { |
| 1476 Variable *T = makeReg(Dest->getType()); | 1476 Variable *T = makeReg(Ty); |
| 1477 _movp(T, Src0); | 1477 _movp(T, Src0); |
| 1478 _padd(T, Src1); | 1478 _padd(T, Src1); |
| 1479 _movp(Dest, T); | 1479 _movp(Dest, T); |
| 1480 } break; | 1480 } break; |
| 1481 case InstArithmetic::And: { | 1481 case InstArithmetic::And: { |
| 1482 Variable *T = makeReg(Dest->getType()); | 1482 Variable *T = makeReg(Ty); |
| 1483 _movp(T, Src0); | 1483 _movp(T, Src0); |
| 1484 _pand(T, Src1); | 1484 _pand(T, Src1); |
| 1485 _movp(Dest, T); | 1485 _movp(Dest, T); |
| 1486 } break; | 1486 } break; |
| 1487 case InstArithmetic::Or: { | 1487 case InstArithmetic::Or: { |
| 1488 Variable *T = makeReg(Dest->getType()); | 1488 Variable *T = makeReg(Ty); |
| 1489 _movp(T, Src0); | 1489 _movp(T, Src0); |
| 1490 _por(T, Src1); | 1490 _por(T, Src1); |
| 1491 _movp(Dest, T); | 1491 _movp(Dest, T); |
| 1492 } break; | 1492 } break; |
| 1493 case InstArithmetic::Xor: { | 1493 case InstArithmetic::Xor: { |
| 1494 Variable *T = makeReg(Dest->getType()); | 1494 Variable *T = makeReg(Ty); |
| 1495 _movp(T, Src0); | 1495 _movp(T, Src0); |
| 1496 _pxor(T, Src1); | 1496 _pxor(T, Src1); |
| 1497 _movp(Dest, T); | 1497 _movp(Dest, T); |
| 1498 } break; | 1498 } break; |
| 1499 case InstArithmetic::Sub: { | 1499 case InstArithmetic::Sub: { |
| 1500 Variable *T = makeReg(Dest->getType()); | 1500 Variable *T = makeReg(Ty); |
| 1501 _movp(T, Src0); | 1501 _movp(T, Src0); |
| 1502 _psub(T, Src1); | 1502 _psub(T, Src1); |
| 1503 _movp(Dest, T); | 1503 _movp(Dest, T); |
| 1504 } break; | 1504 } break; |
| 1505 case InstArithmetic::Mul: { | 1505 case InstArithmetic::Mul: { |
| 1506 bool TypesAreValidForPmull = | 1506 bool TypesAreValidForPmull = Ty == IceType_v4i32 || Ty == IceType_v8i16; |
| 1507 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; | |
| 1508 bool InstructionSetIsValidForPmull = | 1507 bool InstructionSetIsValidForPmull = |
| 1509 Dest->getType() == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; | 1508 Ty == IceType_v8i16 || InstructionSet >= Traits::SSE4_1; |
| 1510 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 1509 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| 1511 Variable *T = makeReg(Dest->getType()); | 1510 Variable *T = makeReg(Ty); |
| 1512 _movp(T, Src0); | 1511 _movp(T, Src0); |
| 1513 _pmull(T, Src0 == Src1 ? T : Src1); | 1512 _pmull(T, Src0 == Src1 ? T : Src1); |
| 1514 _movp(Dest, T); | 1513 _movp(Dest, T); |
| 1515 } else if (Dest->getType() == IceType_v4i32) { | 1514 } else if (Ty == IceType_v4i32) { |
| 1516 // Lowering sequence: | 1515 // Lowering sequence: |
| 1517 // Note: The mask arguments have index 0 on the left. | 1516 // Note: The mask arguments have index 0 on the left. |
| 1518 // | 1517 // |
| 1519 // movups T1, Src0 | 1518 // movups T1, Src0 |
| 1520 // pshufd T2, Src0, {1,0,3,0} | 1519 // pshufd T2, Src0, {1,0,3,0} |
| 1521 // pshufd T3, Src1, {1,0,3,0} | 1520 // pshufd T3, Src1, {1,0,3,0} |
| 1522 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} | 1521 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} |
| 1523 // pmuludq T1, Src1 | 1522 // pmuludq T1, Src1 |
| 1524 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} | 1523 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
| 1525 // pmuludq T2, T3 | 1524 // pmuludq T2, T3 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1543 Variable *T3 = makeReg(IceType_v4i32); | 1542 Variable *T3 = makeReg(IceType_v4i32); |
| 1544 Variable *T4 = makeReg(IceType_v4i32); | 1543 Variable *T4 = makeReg(IceType_v4i32); |
| 1545 _movp(T1, Src0); | 1544 _movp(T1, Src0); |
| 1546 _pshufd(T2, Src0, Mask1030); | 1545 _pshufd(T2, Src0, Mask1030); |
| 1547 _pshufd(T3, Src1, Mask1030); | 1546 _pshufd(T3, Src1, Mask1030); |
| 1548 _pmuludq(T1, Src1); | 1547 _pmuludq(T1, Src1); |
| 1549 _pmuludq(T2, T3); | 1548 _pmuludq(T2, T3); |
| 1550 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); | 1549 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202)); |
| 1551 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); | 1550 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213)); |
| 1552 _movp(Dest, T4); | 1551 _movp(Dest, T4); |
| 1553 } else if (Dest->getType() == IceType_v16i8) { | 1552 } else if (Ty == IceType_v16i8) { |
| 1554 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1553 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1555 } else { | 1554 } else { |
| 1556 llvm::report_fatal_error("Invalid vector multiply type"); | 1555 llvm::report_fatal_error("Invalid vector multiply type"); |
| 1557 } | 1556 } |
| 1558 } break; | 1557 } break; |
| 1559 case InstArithmetic::Shl: | 1558 case InstArithmetic::Shl: |
| 1560 case InstArithmetic::Lshr: | 1559 case InstArithmetic::Lshr: |
| 1561 case InstArithmetic::Ashr: | 1560 case InstArithmetic::Ashr: |
| 1562 case InstArithmetic::Udiv: | 1561 case InstArithmetic::Udiv: |
| 1563 case InstArithmetic::Urem: | 1562 case InstArithmetic::Urem: |
| 1564 case InstArithmetic::Sdiv: | 1563 case InstArithmetic::Sdiv: |
| 1565 case InstArithmetic::Srem: | 1564 case InstArithmetic::Srem: |
| 1566 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1565 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1567 break; | 1566 break; |
| 1568 case InstArithmetic::Fadd: { | 1567 case InstArithmetic::Fadd: { |
| 1569 Variable *T = makeReg(Dest->getType()); | 1568 Variable *T = makeReg(Ty); |
| 1570 _movp(T, Src0); | 1569 _movp(T, Src0); |
| 1571 _addps(T, Src1); | 1570 _addps(T, Src1); |
| 1572 _movp(Dest, T); | 1571 _movp(Dest, T); |
| 1573 } break; | 1572 } break; |
| 1574 case InstArithmetic::Fsub: { | 1573 case InstArithmetic::Fsub: { |
| 1575 Variable *T = makeReg(Dest->getType()); | 1574 Variable *T = makeReg(Ty); |
| 1576 _movp(T, Src0); | 1575 _movp(T, Src0); |
| 1577 _subps(T, Src1); | 1576 _subps(T, Src1); |
| 1578 _movp(Dest, T); | 1577 _movp(Dest, T); |
| 1579 } break; | 1578 } break; |
| 1580 case InstArithmetic::Fmul: { | 1579 case InstArithmetic::Fmul: { |
| 1581 Variable *T = makeReg(Dest->getType()); | 1580 Variable *T = makeReg(Ty); |
| 1582 _movp(T, Src0); | 1581 _movp(T, Src0); |
| 1583 _mulps(T, Src0 == Src1 ? T : Src1); | 1582 _mulps(T, Src0 == Src1 ? T : Src1); |
| 1584 _movp(Dest, T); | 1583 _movp(Dest, T); |
| 1585 } break; | 1584 } break; |
| 1586 case InstArithmetic::Fdiv: { | 1585 case InstArithmetic::Fdiv: { |
| 1587 Variable *T = makeReg(Dest->getType()); | 1586 Variable *T = makeReg(Ty); |
| 1588 _movp(T, Src0); | 1587 _movp(T, Src0); |
| 1589 _divps(T, Src1); | 1588 _divps(T, Src1); |
| 1590 _movp(Dest, T); | 1589 _movp(Dest, T); |
| 1591 } break; | 1590 } break; |
| 1592 case InstArithmetic::Frem: | 1591 case InstArithmetic::Frem: |
| 1593 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1592 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1594 break; | 1593 break; |
| 1595 } | 1594 } |
| 1596 return; | 1595 return; |
| 1597 } | 1596 } |
| (...skipping 28 matching lines...) Expand all Loading... |
| 1626 _sub(T, Src1); | 1625 _sub(T, Src1); |
| 1627 _mov(Dest, T); | 1626 _mov(Dest, T); |
| 1628 break; | 1627 break; |
| 1629 case InstArithmetic::Mul: | 1628 case InstArithmetic::Mul: |
| 1630 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1629 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1631 if (optimizeScalarMul(Dest, Src0, C->getValue())) | 1630 if (optimizeScalarMul(Dest, Src0, C->getValue())) |
| 1632 return; | 1631 return; |
| 1633 } | 1632 } |
| 1634 // The 8-bit version of imul only allows the form "imul r/m8" where T must | 1633 // The 8-bit version of imul only allows the form "imul r/m8" where T must |
| 1635 // be in al. | 1634 // be in al. |
| 1636 if (isByteSizedArithType(Dest->getType())) { | 1635 if (isByteSizedArithType(Ty)) { |
| 1637 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1636 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1638 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1637 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1639 _imul(T, Src0 == Src1 ? T : Src1); | 1638 _imul(T, Src0 == Src1 ? T : Src1); |
| 1640 _mov(Dest, T); | 1639 _mov(Dest, T); |
| 1641 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1640 } else if (auto *ImmConst = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1642 T = makeReg(Dest->getType()); | 1641 T = makeReg(Ty); |
| 1643 _imul_imm(T, Src0, ImmConst); | 1642 _imul_imm(T, Src0, ImmConst); |
| 1644 _mov(Dest, T); | 1643 _mov(Dest, T); |
| 1645 } else { | 1644 } else { |
| 1646 _mov(T, Src0); | 1645 _mov(T, Src0); |
| 1647 _imul(T, Src0 == Src1 ? T : Src1); | 1646 _imul(T, Src0 == Src1 ? T : Src1); |
| 1648 _mov(Dest, T); | 1647 _mov(Dest, T); |
| 1649 } | 1648 } |
| 1650 break; | 1649 break; |
| 1651 case InstArithmetic::Shl: | 1650 case InstArithmetic::Shl: |
| 1652 _mov(T, Src0); | 1651 _mov(T, Src0); |
| 1653 if (!llvm::isa<ConstantInteger32>(Src1)) { | 1652 if (!llvm::isa<ConstantInteger32>(Src1)) |
| 1654 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 1653 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); |
| 1655 _mov(Cl, Src1); | |
| 1656 Src1 = Cl; | |
| 1657 } | |
| 1658 _shl(T, Src1); | 1654 _shl(T, Src1); |
| 1659 _mov(Dest, T); | 1655 _mov(Dest, T); |
| 1660 break; | 1656 break; |
| 1661 case InstArithmetic::Lshr: | 1657 case InstArithmetic::Lshr: |
| 1662 _mov(T, Src0); | 1658 _mov(T, Src0); |
| 1663 if (!llvm::isa<ConstantInteger32>(Src1)) { | 1659 if (!llvm::isa<ConstantInteger32>(Src1)) |
| 1664 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 1660 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); |
| 1665 _mov(Cl, Src1); | |
| 1666 Src1 = Cl; | |
| 1667 } | |
| 1668 _shr(T, Src1); | 1661 _shr(T, Src1); |
| 1669 _mov(Dest, T); | 1662 _mov(Dest, T); |
| 1670 break; | 1663 break; |
| 1671 case InstArithmetic::Ashr: | 1664 case InstArithmetic::Ashr: |
| 1672 _mov(T, Src0); | 1665 _mov(T, Src0); |
| 1673 if (!llvm::isa<ConstantInteger32>(Src1)) { | 1666 if (!llvm::isa<ConstantInteger32>(Src1)) |
| 1674 Variable *Cl = makeReg(IceType_i8, Traits::RegisterSet::Reg_cl); | 1667 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); |
| 1675 _mov(Cl, Src1); | |
| 1676 Src1 = Cl; | |
| 1677 } | |
| 1678 _sar(T, Src1); | 1668 _sar(T, Src1); |
| 1679 _mov(Dest, T); | 1669 _mov(Dest, T); |
| 1680 break; | 1670 break; |
| 1681 case InstArithmetic::Udiv: | 1671 case InstArithmetic::Udiv: { |
| 1682 // div and idiv are the few arithmetic operators that do not allow | 1672 // div and idiv are the few arithmetic operators that do not allow |
| 1683 // immediates as the operand. | 1673 // immediates as the operand. |
| 1684 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1674 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1685 if (isByteSizedArithType(Dest->getType())) { | 1675 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1686 // For 8-bit unsigned division we need to zero-extend al into ah. A mov | 1676 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1687 // $0, %ah (or xor %ah, %ah) would work just fine, except that the x86-64 | 1677 switch (Ty) { |
| 1688 // assembler refuses to encode %ah (encoding %spl with a REX prefix | 1678 default: |
| 1689 // instead.) Accessing %ah in 64-bit is "tricky" as you can't encode %ah | 1679 llvm_unreachable("Bad type for udiv"); |
| 1690 // with any other 8-bit register except for %a[lh], %b[lh], %c[lh], and | 1680 // fallthrough |
| 1691 // d[%lh], which means the X86 target lowering (and the register | 1681 case IceType_i32: |
| 1692 // allocator) would have to be aware of this restriction. For now, we | 1682 break; |
| 1693 // simply zero %eax completely, and move the dividend into %al. | 1683 case IceType_i16: |
| 1694 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1684 Eax = Traits::RegisterSet::Reg_ax; |
| 1695 Context.insert(InstFakeDef::create(Func, T_eax)); | 1685 Edx = Traits::RegisterSet::Reg_dx; |
| 1696 _xor(T_eax, T_eax); | 1686 break; |
| 1697 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1687 case IceType_i8: |
| 1698 _div(T, Src1, T); | 1688 Eax = Traits::RegisterSet::Reg_al; |
| 1699 _mov(Dest, T); | 1689 Edx = Traits::RegisterSet::Reg_ah; |
| 1700 Context.insert(InstFakeUse::create(Func, T_eax)); | 1690 break; |
| 1701 } else { | |
| 1702 Type Ty = Dest->getType(); | |
| 1703 uint32_t Eax = Traits::RegisterSet::Reg_eax; | |
| 1704 uint32_t Edx = Traits::RegisterSet::Reg_edx; | |
| 1705 switch (Ty) { | |
| 1706 default: | |
| 1707 llvm_unreachable("Bad type for udiv"); | |
| 1708 // fallthrough | |
| 1709 case IceType_i32: | |
| 1710 break; | |
| 1711 case IceType_i16: | |
| 1712 Eax = Traits::RegisterSet::Reg_ax; | |
| 1713 Edx = Traits::RegisterSet::Reg_dx; | |
| 1714 break; | |
| 1715 } | |
| 1716 Constant *Zero = Ctx->getConstantZero(Ty); | |
| 1717 _mov(T, Src0, Eax); | |
| 1718 _mov(T_edx, Zero, Edx); | |
| 1719 _div(T, Src1, T_edx); | |
| 1720 _mov(Dest, T); | |
| 1721 } | 1691 } |
| 1722 break; | 1692 _mov(T, Src0, Eax); |
| 1693 _mov(T_edx, Ctx->getConstantZero(Ty), Edx); |
| 1694 _div(T, Src1, T_edx); |
| 1695 _mov(Dest, T); |
| 1696 } break; |
| 1723 case InstArithmetic::Sdiv: | 1697 case InstArithmetic::Sdiv: |
| 1724 // TODO(stichnot): Enable this after doing better performance and cross | 1698 // TODO(stichnot): Enable this after doing better performance and cross |
| 1725 // testing. | 1699 // testing. |
| 1726 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1700 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1727 // Optimize division by constant power of 2, but not for Om1 or O0, just | 1701 // Optimize division by constant power of 2, but not for Om1 or O0, just |
| 1728 // to keep things simple there. | 1702 // to keep things simple there. |
| 1729 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1703 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1730 int32_t Divisor = C->getValue(); | 1704 int32_t Divisor = C->getValue(); |
| 1731 uint32_t UDivisor = static_cast<uint32_t>(Divisor); | 1705 uint32_t UDivisor = static_cast<uint32_t>(Divisor); |
| 1732 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 1706 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 1733 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 1707 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 1734 Type Ty = Dest->getType(); | |
| 1735 // LLVM does the following for dest=src/(1<<log): | 1708 // LLVM does the following for dest=src/(1<<log): |
| 1736 // t=src | 1709 // t=src |
| 1737 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 1710 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 1738 // shr t,typewidth-log | 1711 // shr t,typewidth-log |
| 1739 // add t,src | 1712 // add t,src |
| 1740 // sar t,log | 1713 // sar t,log |
| 1741 // dest=t | 1714 // dest=t |
| 1742 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); | 1715 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 1743 _mov(T, Src0); | 1716 _mov(T, Src0); |
| 1744 // If for some reason we are dividing by 1, just treat it like an | 1717 // If for some reason we are dividing by 1, just treat it like an |
| 1745 // assignment. | 1718 // assignment. |
| 1746 if (LogDiv > 0) { | 1719 if (LogDiv > 0) { |
| 1747 // The initial sar is unnecessary when dividing by 2. | 1720 // The initial sar is unnecessary when dividing by 2. |
| 1748 if (LogDiv > 1) | 1721 if (LogDiv > 1) |
| 1749 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); | 1722 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 1750 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); | 1723 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 1751 _add(T, Src0); | 1724 _add(T, Src0); |
| 1752 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1725 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| 1753 } | 1726 } |
| 1754 _mov(Dest, T); | 1727 _mov(Dest, T); |
| 1755 return; | 1728 return; |
| 1756 } | 1729 } |
| 1757 } | 1730 } |
| 1758 } | 1731 } |
| 1759 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1732 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1760 switch (Type Ty = Dest->getType()) { | 1733 switch (Ty) { |
| 1761 default: | 1734 default: |
| 1762 llvm_unreachable("Bad type for sdiv"); | 1735 llvm_unreachable("Bad type for sdiv"); |
| 1763 // fallthrough | 1736 // fallthrough |
| 1764 case IceType_i32: | 1737 case IceType_i32: |
| 1765 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | 1738 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
| 1766 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1739 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1767 break; | 1740 break; |
| 1768 case IceType_i16: | 1741 case IceType_i16: |
| 1769 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 1742 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
| 1770 _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 1743 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
| 1771 break; | 1744 break; |
| 1772 case IceType_i8: | 1745 case IceType_i8: |
| 1773 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 1746 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
| 1774 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1747 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1775 break; | 1748 break; |
| 1776 } | 1749 } |
| 1777 _cbwdq(T_edx, T); | 1750 _cbwdq(T_edx, T); |
| 1778 _idiv(T, Src1, T_edx); | 1751 _idiv(T, Src1, T_edx); |
| 1779 _mov(Dest, T); | 1752 _mov(Dest, T); |
| 1780 break; | 1753 break; |
| 1781 case InstArithmetic::Urem: | 1754 case InstArithmetic::Urem: { |
| 1782 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1755 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1783 if (isByteSizedArithType(Dest->getType())) { | 1756 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1784 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | 1757 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1785 Context.insert(InstFakeDef::create(Func, T_eax)); | 1758 switch (Ty) { |
| 1786 _xor(T_eax, T_eax); | 1759 default: |
| 1787 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1760 llvm_unreachable("Bad type for urem"); |
| 1788 _div(T, Src1, T); | 1761 // fallthrough |
| 1789 // shr $8, %eax shifts ah (i.e., the 8 bit remainder) into al. We don't | 1762 case IceType_i32: |
| 1790 // mov %ah, %al because it would make x86-64 codegen more complicated. If | 1763 break; |
| 1791 // this ever becomes a problem we can introduce a pseudo rem instruction | 1764 case IceType_i16: |
| 1792 // that returns the remainder in %al directly (and uses a mov for copying | 1765 Eax = Traits::RegisterSet::Reg_ax; |
| 1793 // %ah to %al.) | 1766 Edx = Traits::RegisterSet::Reg_dx; |
| 1794 static constexpr uint8_t AlSizeInBits = 8; | 1767 break; |
| 1795 _shr(T_eax, Ctx->getConstantInt8(AlSizeInBits)); | 1768 case IceType_i8: |
| 1796 _mov(Dest, T); | 1769 Eax = Traits::RegisterSet::Reg_al; |
| 1797 Context.insert(InstFakeUse::create(Func, T_eax)); | 1770 Edx = Traits::RegisterSet::Reg_ah; |
| 1798 } else { | 1771 break; |
| 1799 Type Ty = Dest->getType(); | |
| 1800 uint32_t Eax = Traits::RegisterSet::Reg_eax; | |
| 1801 uint32_t Edx = Traits::RegisterSet::Reg_edx; | |
| 1802 switch (Ty) { | |
| 1803 default: | |
| 1804 llvm_unreachable("Bad type for urem"); | |
| 1805 // fallthrough | |
| 1806 case IceType_i32: | |
| 1807 break; | |
| 1808 case IceType_i16: | |
| 1809 Eax = Traits::RegisterSet::Reg_ax; | |
| 1810 Edx = Traits::RegisterSet::Reg_dx; | |
| 1811 break; | |
| 1812 } | |
| 1813 Constant *Zero = Ctx->getConstantZero(Ty); | |
| 1814 T_edx = makeReg(Dest->getType(), Edx); | |
| 1815 _mov(T_edx, Zero); | |
| 1816 _mov(T, Src0, Eax); | |
| 1817 _div(T_edx, Src1, T); | |
| 1818 _mov(Dest, T_edx); | |
| 1819 } | 1772 } |
| 1820 break; | 1773 T_edx = makeReg(Ty, Edx); |
| 1821 case InstArithmetic::Srem: | 1774 _mov(T_edx, Ctx->getConstantZero(Ty)); |
| 1775 _mov(T, Src0, Eax); |
| 1776 _div(T_edx, Src1, T); |
| 1777 _mov(Dest, T_edx); |
| 1778 } break; |
| 1779 case InstArithmetic::Srem: { |
| 1822 // TODO(stichnot): Enable this after doing better performance and cross | 1780 // TODO(stichnot): Enable this after doing better performance and cross |
| 1823 // testing. | 1781 // testing. |
| 1824 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { | 1782 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1825 // Optimize mod by constant power of 2, but not for Om1 or O0, just to | 1783 // Optimize mod by constant power of 2, but not for Om1 or O0, just to |
| 1826 // keep things simple there. | 1784 // keep things simple there. |
| 1827 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { | 1785 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1828 int32_t Divisor = C->getValue(); | 1786 int32_t Divisor = C->getValue(); |
| 1829 uint32_t UDivisor = static_cast<uint32_t>(Divisor); | 1787 uint32_t UDivisor = static_cast<uint32_t>(Divisor); |
| 1830 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { | 1788 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 1831 uint32_t LogDiv = llvm::Log2_32(UDivisor); | 1789 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 1832 Type Ty = Dest->getType(); | |
| 1833 // LLVM does the following for dest=src%(1<<log): | 1790 // LLVM does the following for dest=src%(1<<log): |
| 1834 // t=src | 1791 // t=src |
| 1835 // sar t,typewidth-1 // -1 if src is negative, 0 if not | 1792 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 1836 // shr t,typewidth-log | 1793 // shr t,typewidth-log |
| 1837 // add t,src | 1794 // add t,src |
| 1838 // and t, -(1<<log) | 1795 // and t, -(1<<log) |
| 1839 // sub t,src | 1796 // sub t,src |
| 1840 // neg t | 1797 // neg t |
| 1841 // dest=t | 1798 // dest=t |
| 1842 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); | 1799 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1853 _add(T, Src0); | 1810 _add(T, Src0); |
| 1854 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1811 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 1855 _sub(T, Src0); | 1812 _sub(T, Src0); |
| 1856 _neg(T); | 1813 _neg(T); |
| 1857 _mov(Dest, T); | 1814 _mov(Dest, T); |
| 1858 return; | 1815 return; |
| 1859 } | 1816 } |
| 1860 } | 1817 } |
| 1861 } | 1818 } |
| 1862 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1819 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1863 switch (Type Ty = Dest->getType()) { | 1820 uint32_t Eax = Traits::RegisterSet::Reg_eax; |
| 1821 uint32_t Edx = Traits::RegisterSet::Reg_edx; |
| 1822 switch (Ty) { |
| 1864 default: | 1823 default: |
| 1865 llvm_unreachable("Bad type for srem"); | 1824 llvm_unreachable("Bad type for srem"); |
| 1866 // fallthrough | 1825 // fallthrough |
| 1867 case IceType_i32: | 1826 case IceType_i32: |
| 1868 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | |
| 1869 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | |
| 1870 _cbwdq(T_edx, T); | |
| 1871 _idiv(T_edx, Src1, T); | |
| 1872 _mov(Dest, T_edx); | |
| 1873 break; | 1827 break; |
| 1874 case IceType_i16: | 1828 case IceType_i16: |
| 1875 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 1829 Eax = Traits::RegisterSet::Reg_ax; |
| 1876 _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 1830 Edx = Traits::RegisterSet::Reg_dx; |
| 1877 _cbwdq(T_edx, T); | |
| 1878 _idiv(T_edx, Src1, T); | |
| 1879 _mov(Dest, T_edx); | |
| 1880 break; | 1831 break; |
| 1881 case IceType_i8: | 1832 case IceType_i8: |
| 1882 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 1833 Eax = Traits::RegisterSet::Reg_al; |
| 1883 // TODO(stichnot): Use register ah for T_edx, and remove the _shr(). | 1834 Edx = Traits::RegisterSet::Reg_ah; |
| 1884 // T_edx = makeReg(Ty, Traits::RegisterSet::Reg_ah); | |
| 1885 _mov(T, Src0, Traits::RegisterSet::Reg_al); | |
| 1886 _cbwdq(T_edx, T); | |
| 1887 _idiv(T_edx, Src1, T); | |
| 1888 static constexpr uint8_t AlSizeInBits = 8; | |
| 1889 _shr(T_edx, Ctx->getConstantInt8(AlSizeInBits)); | |
| 1890 _mov(Dest, T_edx); | |
| 1891 break; | 1835 break; |
| 1892 } | 1836 } |
| 1893 break; | 1837 T_edx = makeReg(Ty, Edx); |
| 1838 _mov(T, Src0, Eax); |
| 1839 _cbwdq(T_edx, T); |
| 1840 _idiv(T_edx, Src1, T); |
| 1841 _mov(Dest, T_edx); |
| 1842 } break; |
| 1894 case InstArithmetic::Fadd: | 1843 case InstArithmetic::Fadd: |
| 1895 _mov(T, Src0); | 1844 _mov(T, Src0); |
| 1896 _addss(T, Src1); | 1845 _addss(T, Src1); |
| 1897 _mov(Dest, T); | 1846 _mov(Dest, T); |
| 1898 break; | 1847 break; |
| 1899 case InstArithmetic::Fsub: | 1848 case InstArithmetic::Fsub: |
| 1900 _mov(T, Src0); | 1849 _mov(T, Src0); |
| 1901 _subss(T, Src1); | 1850 _subss(T, Src1); |
| 1902 _mov(Dest, T); | 1851 _mov(Dest, T); |
| 1903 break; | 1852 break; |
| 1904 case InstArithmetic::Fmul: | 1853 case InstArithmetic::Fmul: |
| 1905 _mov(T, Src0); | 1854 _mov(T, Src0); |
| 1906 _mulss(T, Src0 == Src1 ? T : Src1); | 1855 _mulss(T, Src0 == Src1 ? T : Src1); |
| 1907 _mov(Dest, T); | 1856 _mov(Dest, T); |
| 1908 break; | 1857 break; |
| 1909 case InstArithmetic::Fdiv: | 1858 case InstArithmetic::Fdiv: |
| 1910 _mov(T, Src0); | 1859 _mov(T, Src0); |
| 1911 _divss(T, Src1); | 1860 _divss(T, Src1); |
| 1912 _mov(Dest, T); | 1861 _mov(Dest, T); |
| 1913 break; | 1862 break; |
| 1914 case InstArithmetic::Frem: { | 1863 case InstArithmetic::Frem: { |
| 1915 constexpr SizeT MaxSrcs = 2; | 1864 constexpr SizeT MaxSrcs = 2; |
| 1916 Type Ty = Dest->getType(); | |
| 1917 InstCall *Call = makeHelperCall( | 1865 InstCall *Call = makeHelperCall( |
| 1918 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 1866 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
| 1919 Call->addArg(Src0); | 1867 Call->addArg(Src0); |
| 1920 Call->addArg(Src1); | 1868 Call->addArg(Src1); |
| 1921 return lowerCall(Call); | 1869 return lowerCall(Call); |
| 1922 } | 1870 } |
| 1923 } | 1871 } |
| 1924 } | 1872 } |
| 1925 | 1873 |
| 1926 template <class Machine> | 1874 template <class Machine> |
| (...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1984 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1932 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1985 _cmp(Src0, Zero); | 1933 _cmp(Src0, Zero); |
| 1986 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1934 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| 1987 } | 1935 } |
| 1988 | 1936 |
| 1989 template <class Machine> | 1937 template <class Machine> |
| 1990 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 1938 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
| 1991 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 1939 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
| 1992 InstCast::OpKind CastKind = Inst->getCastKind(); | 1940 InstCast::OpKind CastKind = Inst->getCastKind(); |
| 1993 Variable *Dest = Inst->getDest(); | 1941 Variable *Dest = Inst->getDest(); |
| 1942 Type DestTy = Dest->getType(); |
| 1994 switch (CastKind) { | 1943 switch (CastKind) { |
| 1995 default: | 1944 default: |
| 1996 Func->setError("Cast type not supported"); | 1945 Func->setError("Cast type not supported"); |
| 1997 return; | 1946 return; |
| 1998 case InstCast::Sext: { | 1947 case InstCast::Sext: { |
| 1999 // Src0RM is the source operand legalized to physical register or memory, | 1948 // Src0RM is the source operand legalized to physical register or memory, |
| 2000 // but not immediate, since the relevant x86 native instructions don't | 1949 // but not immediate, since the relevant x86 native instructions don't |
| 2001 // allow an immediate operand. If the operand is an immediate, we could | 1950 // allow an immediate operand. If the operand is an immediate, we could |
| 2002 // consider computing the strength-reduced result at translation time, but | 1951 // consider computing the strength-reduced result at translation time, but |
| 2003 // we're unlikely to see something like that in the bitcode that the | 1952 // we're unlikely to see something like that in the bitcode that the |
| 2004 // optimizer wouldn't have already taken care of. | 1953 // optimizer wouldn't have already taken care of. |
| 2005 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1954 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2006 if (isVectorType(Dest->getType())) { | 1955 if (isVectorType(DestTy)) { |
| 2007 Type DestTy = Dest->getType(); | |
| 2008 if (DestTy == IceType_v16i8) { | 1956 if (DestTy == IceType_v16i8) { |
| 2009 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 | 1957 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 |
| 2010 Variable *OneMask = makeVectorOfOnes(Dest->getType()); | 1958 Variable *OneMask = makeVectorOfOnes(DestTy); |
| 2011 Variable *T = makeReg(DestTy); | 1959 Variable *T = makeReg(DestTy); |
| 2012 _movp(T, Src0RM); | 1960 _movp(T, Src0RM); |
| 2013 _pand(T, OneMask); | 1961 _pand(T, OneMask); |
| 2014 Variable *Zeros = makeVectorOfZeros(Dest->getType()); | 1962 Variable *Zeros = makeVectorOfZeros(DestTy); |
| 2015 _pcmpgt(T, Zeros); | 1963 _pcmpgt(T, Zeros); |
| 2016 _movp(Dest, T); | 1964 _movp(Dest, T); |
| 2017 } else { | 1965 } else { |
| 2018 /// width = width(elty) - 1; dest = (src << width) >> width | 1966 /// width = width(elty) - 1; dest = (src << width) >> width |
| 2019 SizeT ShiftAmount = | 1967 SizeT ShiftAmount = |
| 2020 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - | 1968 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 2021 1; | 1969 1; |
| 2022 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); | 1970 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
| 2023 Variable *T = makeReg(DestTy); | 1971 Variable *T = makeReg(DestTy); |
| 2024 _movp(T, Src0RM); | 1972 _movp(T, Src0RM); |
| 2025 _psll(T, ShiftConstant); | 1973 _psll(T, ShiftConstant); |
| 2026 _psra(T, ShiftConstant); | 1974 _psra(T, ShiftConstant); |
| 2027 _movp(Dest, T); | 1975 _movp(Dest, T); |
| 2028 } | 1976 } |
| 2029 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1977 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 2030 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | 1978 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 |
| 2031 Constant *Shift = Ctx->getConstantInt32(31); | 1979 Constant *Shift = Ctx->getConstantInt32(31); |
| 2032 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1980 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2033 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1981 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2034 Variable *T_Lo = makeReg(DestLo->getType()); | 1982 Variable *T_Lo = makeReg(DestLo->getType()); |
| 2035 if (Src0RM->getType() == IceType_i32) { | 1983 if (Src0RM->getType() == IceType_i32) { |
| 2036 _mov(T_Lo, Src0RM); | 1984 _mov(T_Lo, Src0RM); |
| 2037 } else if (Src0RM->getType() == IceType_i1) { | 1985 } else if (Src0RM->getType() == IceType_i1) { |
| 2038 _movzx(T_Lo, Src0RM); | 1986 _movzx(T_Lo, Src0RM); |
| 2039 _shl(T_Lo, Shift); | 1987 _shl(T_Lo, Shift); |
| 2040 _sar(T_Lo, Shift); | 1988 _sar(T_Lo, Shift); |
| 2041 } else { | 1989 } else { |
| 2042 _movsx(T_Lo, Src0RM); | 1990 _movsx(T_Lo, Src0RM); |
| 2043 } | 1991 } |
| 2044 _mov(DestLo, T_Lo); | 1992 _mov(DestLo, T_Lo); |
| 2045 Variable *T_Hi = nullptr; | 1993 Variable *T_Hi = nullptr; |
| 2046 _mov(T_Hi, T_Lo); | 1994 _mov(T_Hi, T_Lo); |
| 2047 if (Src0RM->getType() != IceType_i1) | 1995 if (Src0RM->getType() != IceType_i1) |
| 2048 // For i1, the sar instruction is already done above. | 1996 // For i1, the sar instruction is already done above. |
| 2049 _sar(T_Hi, Shift); | 1997 _sar(T_Hi, Shift); |
| 2050 _mov(DestHi, T_Hi); | 1998 _mov(DestHi, T_Hi); |
| 2051 } else if (Src0RM->getType() == IceType_i1) { | 1999 } else if (Src0RM->getType() == IceType_i1) { |
| 2052 // t1 = src | 2000 // t1 = src |
| 2053 // shl t1, dst_bitwidth - 1 | 2001 // shl t1, dst_bitwidth - 1 |
| 2054 // sar t1, dst_bitwidth - 1 | 2002 // sar t1, dst_bitwidth - 1 |
| 2055 // dst = t1 | 2003 // dst = t1 |
| 2056 size_t DestBits = | 2004 size_t DestBits = Traits::X86_CHAR_BIT * typeWidthInBytes(DestTy); |
| 2057 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); | |
| 2058 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); | 2005 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); |
| 2059 Variable *T = makeReg(Dest->getType()); | 2006 Variable *T = makeReg(DestTy); |
| 2060 if (typeWidthInBytes(Dest->getType()) <= | 2007 if (typeWidthInBytes(DestTy) <= typeWidthInBytes(Src0RM->getType())) { |
| 2061 typeWidthInBytes(Src0RM->getType())) { | |
| 2062 _mov(T, Src0RM); | 2008 _mov(T, Src0RM); |
| 2063 } else { | 2009 } else { |
| 2064 // Widen the source using movsx or movzx. (It doesn't matter which one, | 2010 // Widen the source using movsx or movzx. (It doesn't matter which one, |
| 2065 // since the following shl/sar overwrite the bits.) | 2011 // since the following shl/sar overwrite the bits.) |
| 2066 _movzx(T, Src0RM); | 2012 _movzx(T, Src0RM); |
| 2067 } | 2013 } |
| 2068 _shl(T, ShiftAmount); | 2014 _shl(T, ShiftAmount); |
| 2069 _sar(T, ShiftAmount); | 2015 _sar(T, ShiftAmount); |
| 2070 _mov(Dest, T); | 2016 _mov(Dest, T); |
| 2071 } else { | 2017 } else { |
| 2072 // t1 = movsx src; dst = t1 | 2018 // t1 = movsx src; dst = t1 |
| 2073 Variable *T = makeReg(Dest->getType()); | 2019 Variable *T = makeReg(DestTy); |
| 2074 _movsx(T, Src0RM); | 2020 _movsx(T, Src0RM); |
| 2075 _mov(Dest, T); | 2021 _mov(Dest, T); |
| 2076 } | 2022 } |
| 2077 break; | 2023 break; |
| 2078 } | 2024 } |
| 2079 case InstCast::Zext: { | 2025 case InstCast::Zext: { |
| 2080 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2026 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2081 if (isVectorType(Dest->getType())) { | 2027 if (isVectorType(DestTy)) { |
| 2082 // onemask = materialize(1,1,...); dest = onemask & src | 2028 // onemask = materialize(1,1,...); dest = onemask & src |
| 2083 Type DestTy = Dest->getType(); | |
| 2084 Variable *OneMask = makeVectorOfOnes(DestTy); | 2029 Variable *OneMask = makeVectorOfOnes(DestTy); |
| 2085 Variable *T = makeReg(DestTy); | 2030 Variable *T = makeReg(DestTy); |
| 2086 _movp(T, Src0RM); | 2031 _movp(T, Src0RM); |
| 2087 _pand(T, OneMask); | 2032 _pand(T, OneMask); |
| 2088 _movp(Dest, T); | 2033 _movp(Dest, T); |
| 2089 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2034 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 2090 // t1=movzx src; dst.lo=t1; dst.hi=0 | 2035 // t1=movzx src; dst.lo=t1; dst.hi=0 |
| 2091 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2036 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2092 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2037 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2093 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2038 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2094 Variable *Tmp = makeReg(DestLo->getType()); | 2039 Variable *Tmp = makeReg(DestLo->getType()); |
| 2095 if (Src0RM->getType() == IceType_i32) { | 2040 if (Src0RM->getType() == IceType_i32) { |
| 2096 _mov(Tmp, Src0RM); | 2041 _mov(Tmp, Src0RM); |
| 2097 } else { | 2042 } else { |
| 2098 _movzx(Tmp, Src0RM); | 2043 _movzx(Tmp, Src0RM); |
| 2099 } | 2044 } |
| 2100 _mov(DestLo, Tmp); | 2045 _mov(DestLo, Tmp); |
| 2101 _mov(DestHi, Zero); | 2046 _mov(DestHi, Zero); |
| 2102 } else if (Src0RM->getType() == IceType_i1) { | 2047 } else if (Src0RM->getType() == IceType_i1) { |
| 2103 // t = Src0RM; Dest = t | 2048 // t = Src0RM; Dest = t |
| 2104 Type DestTy = Dest->getType(); | |
| 2105 Variable *T = nullptr; | 2049 Variable *T = nullptr; |
| 2106 if (DestTy == IceType_i8) { | 2050 if (DestTy == IceType_i8) { |
| 2107 _mov(T, Src0RM); | 2051 _mov(T, Src0RM); |
| 2108 } else { | 2052 } else { |
| 2109 assert(DestTy != IceType_i1); | 2053 assert(DestTy != IceType_i1); |
| 2110 assert(Traits::Is64Bit || DestTy != IceType_i64); | 2054 assert(Traits::Is64Bit || DestTy != IceType_i64); |
| 2111 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. | 2055 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. |
| 2112 // In x86-64 we need to widen T to 64-bits to ensure that T -- if | 2056 // In x86-64 we need to widen T to 64-bits to ensure that T -- if |
| 2113 // written to the stack (i.e., in -Om1) will be fully zero-extended. | 2057 // written to the stack (i.e., in -Om1) will be fully zero-extended. |
| 2114 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32); | 2058 T = makeReg(DestTy == IceType_i64 ? IceType_i64 : IceType_i32); |
| 2115 _movzx(T, Src0RM); | 2059 _movzx(T, Src0RM); |
| 2116 } | 2060 } |
| 2117 _mov(Dest, T); | 2061 _mov(Dest, T); |
| 2118 } else { | 2062 } else { |
| 2119 // t1 = movzx src; dst = t1 | 2063 // t1 = movzx src; dst = t1 |
| 2120 Variable *T = makeReg(Dest->getType()); | 2064 Variable *T = makeReg(DestTy); |
| 2121 _movzx(T, Src0RM); | 2065 _movzx(T, Src0RM); |
| 2122 _mov(Dest, T); | 2066 _mov(Dest, T); |
| 2123 } | 2067 } |
| 2124 break; | 2068 break; |
| 2125 } | 2069 } |
| 2126 case InstCast::Trunc: { | 2070 case InstCast::Trunc: { |
| 2127 if (isVectorType(Dest->getType())) { | 2071 if (isVectorType(DestTy)) { |
| 2128 // onemask = materialize(1,1,...); dst = src & onemask | 2072 // onemask = materialize(1,1,...); dst = src & onemask |
| 2129 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2073 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2130 Type Src0Ty = Src0RM->getType(); | 2074 Type Src0Ty = Src0RM->getType(); |
| 2131 Variable *OneMask = makeVectorOfOnes(Src0Ty); | 2075 Variable *OneMask = makeVectorOfOnes(Src0Ty); |
| 2132 Variable *T = makeReg(Dest->getType()); | 2076 Variable *T = makeReg(DestTy); |
| 2133 _movp(T, Src0RM); | 2077 _movp(T, Src0RM); |
| 2134 _pand(T, OneMask); | 2078 _pand(T, OneMask); |
| 2135 _movp(Dest, T); | 2079 _movp(Dest, T); |
| 2080 } else if (DestTy == IceType_i1 || DestTy == IceType_i8) { |
| 2081 // Make sure we truncate from and into valid registers. |
| 2082 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 2083 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) |
| 2084 Src0 = loOperand(Src0); |
| 2085 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2086 Variable *T = copyToReg8(Src0RM); |
| 2087 if (DestTy == IceType_i1) |
| 2088 _and(T, Ctx->getConstantInt1(1)); |
| 2089 _mov(Dest, T); |
| 2136 } else { | 2090 } else { |
| 2137 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2091 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 2138 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) | 2092 if (!Traits::Is64Bit && Src0->getType() == IceType_i64) |
| 2139 Src0 = loOperand(Src0); | 2093 Src0 = loOperand(Src0); |
| 2140 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2094 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2141 // t1 = trunc Src0RM; Dest = t1 | 2095 // t1 = trunc Src0RM; Dest = t1 |
| 2142 Variable *T = nullptr; | 2096 Variable *T = makeReg(DestTy); |
| 2143 _mov(T, Src0RM); | 2097 _mov(T, Src0RM); |
| 2144 if (Dest->getType() == IceType_i1) | |
| 2145 _and(T, Ctx->getConstantInt1(1)); | |
| 2146 _mov(Dest, T); | 2098 _mov(Dest, T); |
| 2147 } | 2099 } |
| 2148 break; | 2100 break; |
| 2149 } | 2101 } |
| 2150 case InstCast::Fptrunc: | 2102 case InstCast::Fptrunc: |
| 2151 case InstCast::Fpext: { | 2103 case InstCast::Fpext: { |
| 2152 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2104 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2153 // t1 = cvt Src0RM; Dest = t1 | 2105 // t1 = cvt Src0RM; Dest = t1 |
| 2154 Variable *T = makeReg(Dest->getType()); | 2106 Variable *T = makeReg(DestTy); |
| 2155 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); | 2107 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float); |
| 2156 _mov(Dest, T); | 2108 _mov(Dest, T); |
| 2157 break; | 2109 break; |
| 2158 } | 2110 } |
| 2159 case InstCast::Fptosi: | 2111 case InstCast::Fptosi: |
| 2160 if (isVectorType(Dest->getType())) { | 2112 if (isVectorType(DestTy)) { |
| 2161 assert(Dest->getType() == IceType_v4i32 && | 2113 assert(DestTy == IceType_v4i32 && |
| 2162 Inst->getSrc(0)->getType() == IceType_v4f32); | 2114 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2163 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2115 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2164 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2116 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2165 Src0RM = legalizeToReg(Src0RM); | 2117 Src0RM = legalizeToReg(Src0RM); |
| 2166 Variable *T = makeReg(Dest->getType()); | 2118 Variable *T = makeReg(DestTy); |
| 2167 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 2119 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2168 _movp(Dest, T); | 2120 _movp(Dest, T); |
| 2169 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2121 } else if (!Traits::Is64Bit && DestTy == IceType_i64) { |
| 2170 constexpr SizeT MaxSrcs = 1; | 2122 constexpr SizeT MaxSrcs = 1; |
| 2171 Type SrcType = Inst->getSrc(0)->getType(); | 2123 Type SrcType = Inst->getSrc(0)->getType(); |
| 2172 InstCall *Call = | 2124 InstCall *Call = |
| 2173 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 2125 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 2174 : H_fptosi_f64_i64, | 2126 : H_fptosi_f64_i64, |
| 2175 Dest, MaxSrcs); | 2127 Dest, MaxSrcs); |
| 2176 Call->addArg(Inst->getSrc(0)); | 2128 Call->addArg(Inst->getSrc(0)); |
| 2177 lowerCall(Call); | 2129 lowerCall(Call); |
| 2178 } else { | 2130 } else { |
| 2179 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2131 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2180 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2132 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2181 Variable *T_1 = nullptr; | 2133 Variable *T_1 = nullptr; |
| 2182 if (Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2134 if (Traits::Is64Bit && DestTy == IceType_i64) { |
| 2183 T_1 = makeReg(IceType_i64); | 2135 T_1 = makeReg(IceType_i64); |
| 2184 } else { | 2136 } else { |
| 2185 assert(Dest->getType() != IceType_i64); | 2137 assert(DestTy != IceType_i64); |
| 2186 T_1 = makeReg(IceType_i32); | 2138 T_1 = makeReg(IceType_i32); |
| 2187 } | 2139 } |
| 2188 // cvt() requires its integer argument to be a GPR. | 2140 // cvt() requires its integer argument to be a GPR. |
| 2189 Variable *T_2 = makeReg(Dest->getType()); | 2141 Variable *T_2 = makeReg(DestTy); |
| 2142 if (isByteSizedType(DestTy)) { |
| 2143 assert(T_1->getType() == IceType_i32); |
| 2144 T_1->setRegClass(RCX86_Is32To8); |
| 2145 T_2->setRegClass(RCX86_IsTrunc8Rcvr); |
| 2146 } |
| 2190 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2147 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2191 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2148 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2192 if (Dest->getType() == IceType_i1) | 2149 if (DestTy == IceType_i1) |
| 2193 _and(T_2, Ctx->getConstantInt1(1)); | 2150 _and(T_2, Ctx->getConstantInt1(1)); |
| 2194 _mov(Dest, T_2); | 2151 _mov(Dest, T_2); |
| 2195 } | 2152 } |
| 2196 break; | 2153 break; |
| 2197 case InstCast::Fptoui: | 2154 case InstCast::Fptoui: |
| 2198 if (isVectorType(Dest->getType())) { | 2155 if (isVectorType(DestTy)) { |
| 2199 assert(Dest->getType() == IceType_v4i32 && | 2156 assert(DestTy == IceType_v4i32 && |
| 2200 Inst->getSrc(0)->getType() == IceType_v4f32); | 2157 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2201 constexpr SizeT MaxSrcs = 1; | 2158 constexpr SizeT MaxSrcs = 1; |
| 2202 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2159 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
| 2203 Call->addArg(Inst->getSrc(0)); | 2160 Call->addArg(Inst->getSrc(0)); |
| 2204 lowerCall(Call); | 2161 lowerCall(Call); |
| 2205 } else if (Dest->getType() == IceType_i64 || | 2162 } else if (DestTy == IceType_i64 || |
| 2206 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 2163 (!Traits::Is64Bit && DestTy == IceType_i32)) { |
| 2207 // Use a helper for both x86-32 and x86-64. | 2164 // Use a helper for both x86-32 and x86-64. |
| 2208 constexpr SizeT MaxSrcs = 1; | 2165 constexpr SizeT MaxSrcs = 1; |
| 2209 Type DestType = Dest->getType(); | |
| 2210 Type SrcType = Inst->getSrc(0)->getType(); | 2166 Type SrcType = Inst->getSrc(0)->getType(); |
| 2211 IceString TargetString; | 2167 IceString TargetString; |
| 2212 if (Traits::Is64Bit) { | 2168 if (Traits::Is64Bit) { |
| 2213 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2169 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2214 : H_fptoui_f64_i64; | 2170 : H_fptoui_f64_i64; |
| 2215 } else if (isInt32Asserting32Or64(DestType)) { | 2171 } else if (isInt32Asserting32Or64(DestTy)) { |
| 2216 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2172 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
| 2217 : H_fptoui_f64_i32; | 2173 : H_fptoui_f64_i32; |
| 2218 } else { | 2174 } else { |
| 2219 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2175 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2220 : H_fptoui_f64_i64; | 2176 : H_fptoui_f64_i64; |
| 2221 } | 2177 } |
| 2222 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2178 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| 2223 Call->addArg(Inst->getSrc(0)); | 2179 Call->addArg(Inst->getSrc(0)); |
| 2224 lowerCall(Call); | 2180 lowerCall(Call); |
| 2225 return; | 2181 return; |
| 2226 } else { | 2182 } else { |
| 2227 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2183 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2228 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2184 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
| 2229 assert(Dest->getType() != IceType_i64); | 2185 assert(DestTy != IceType_i64); |
| 2230 Variable *T_1 = nullptr; | 2186 Variable *T_1 = nullptr; |
| 2231 if (Traits::Is64Bit && Dest->getType() == IceType_i32) { | 2187 if (Traits::Is64Bit && DestTy == IceType_i32) { |
| 2232 T_1 = makeReg(IceType_i64); | 2188 T_1 = makeReg(IceType_i64); |
| 2233 } else { | 2189 } else { |
| 2234 assert(Dest->getType() != IceType_i32); | 2190 assert(DestTy != IceType_i32); |
| 2235 T_1 = makeReg(IceType_i32); | 2191 T_1 = makeReg(IceType_i32); |
| 2236 } | 2192 } |
| 2237 Variable *T_2 = makeReg(Dest->getType()); | 2193 Variable *T_2 = makeReg(DestTy); |
| 2194 if (isByteSizedType(DestTy)) { |
| 2195 assert(T_1->getType() == IceType_i32); |
| 2196 T_1->setRegClass(RCX86_Is32To8); |
| 2197 T_2->setRegClass(RCX86_IsTrunc8Rcvr); |
| 2198 } |
| 2238 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); | 2199 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si); |
| 2239 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2200 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
| 2240 if (Dest->getType() == IceType_i1) | 2201 if (DestTy == IceType_i1) |
| 2241 _and(T_2, Ctx->getConstantInt1(1)); | 2202 _and(T_2, Ctx->getConstantInt1(1)); |
| 2242 _mov(Dest, T_2); | 2203 _mov(Dest, T_2); |
| 2243 } | 2204 } |
| 2244 break; | 2205 break; |
| 2245 case InstCast::Sitofp: | 2206 case InstCast::Sitofp: |
| 2246 if (isVectorType(Dest->getType())) { | 2207 if (isVectorType(DestTy)) { |
| 2247 assert(Dest->getType() == IceType_v4f32 && | 2208 assert(DestTy == IceType_v4f32 && |
| 2248 Inst->getSrc(0)->getType() == IceType_v4i32); | 2209 Inst->getSrc(0)->getType() == IceType_v4i32); |
| 2249 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2210 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2250 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 2211 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2251 Src0RM = legalizeToReg(Src0RM); | 2212 Src0RM = legalizeToReg(Src0RM); |
| 2252 Variable *T = makeReg(Dest->getType()); | 2213 Variable *T = makeReg(DestTy); |
| 2253 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); | 2214 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); |
| 2254 _movp(Dest, T); | 2215 _movp(Dest, T); |
| 2255 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { | 2216 } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) { |
| 2256 // Use a helper for x86-32. | 2217 // Use a helper for x86-32. |
| 2257 constexpr SizeT MaxSrcs = 1; | 2218 constexpr SizeT MaxSrcs = 1; |
| 2258 Type DestType = Dest->getType(); | |
| 2259 InstCall *Call = | 2219 InstCall *Call = |
| 2260 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 | 2220 makeHelperCall(isFloat32Asserting32Or64(DestTy) ? H_sitofp_i64_f32 |
| 2261 : H_sitofp_i64_f64, | 2221 : H_sitofp_i64_f64, |
| 2262 Dest, MaxSrcs); | 2222 Dest, MaxSrcs); |
| 2263 // TODO: Call the correct compiler-rt helper function. | 2223 // TODO: Call the correct compiler-rt helper function. |
| 2264 Call->addArg(Inst->getSrc(0)); | 2224 Call->addArg(Inst->getSrc(0)); |
| 2265 lowerCall(Call); | 2225 lowerCall(Call); |
| 2266 return; | 2226 return; |
| 2267 } else { | 2227 } else { |
| 2268 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2228 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2269 // Sign-extend the operand. | 2229 // Sign-extend the operand. |
| 2270 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2230 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2271 Variable *T_1 = nullptr; | 2231 Variable *T_1 = nullptr; |
| 2272 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { | 2232 if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) { |
| 2273 T_1 = makeReg(IceType_i64); | 2233 T_1 = makeReg(IceType_i64); |
| 2274 } else { | 2234 } else { |
| 2275 assert(Src0RM->getType() != IceType_i64); | 2235 assert(Src0RM->getType() != IceType_i64); |
| 2276 T_1 = makeReg(IceType_i32); | 2236 T_1 = makeReg(IceType_i32); |
| 2277 } | 2237 } |
| 2278 Variable *T_2 = makeReg(Dest->getType()); | 2238 Variable *T_2 = makeReg(DestTy); |
| 2279 if (Src0RM->getType() == T_1->getType()) | 2239 if (Src0RM->getType() == T_1->getType()) |
| 2280 _mov(T_1, Src0RM); | 2240 _mov(T_1, Src0RM); |
| 2281 else | 2241 else |
| 2282 _movsx(T_1, Src0RM); | 2242 _movsx(T_1, Src0RM); |
| 2283 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2243 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2284 _mov(Dest, T_2); | 2244 _mov(Dest, T_2); |
| 2285 } | 2245 } |
| 2286 break; | 2246 break; |
| 2287 case InstCast::Uitofp: { | 2247 case InstCast::Uitofp: { |
| 2288 Operand *Src0 = Inst->getSrc(0); | 2248 Operand *Src0 = Inst->getSrc(0); |
| 2289 if (isVectorType(Src0->getType())) { | 2249 if (isVectorType(Src0->getType())) { |
| 2290 assert(Dest->getType() == IceType_v4f32 && | 2250 assert(DestTy == IceType_v4f32 && Src0->getType() == IceType_v4i32); |
| 2291 Src0->getType() == IceType_v4i32); | |
| 2292 constexpr SizeT MaxSrcs = 1; | 2251 constexpr SizeT MaxSrcs = 1; |
| 2293 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); | 2252 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); |
| 2294 Call->addArg(Src0); | 2253 Call->addArg(Src0); |
| 2295 lowerCall(Call); | 2254 lowerCall(Call); |
| 2296 } else if (Src0->getType() == IceType_i64 || | 2255 } else if (Src0->getType() == IceType_i64 || |
| 2297 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { | 2256 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { |
| 2298 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on | 2257 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on |
| 2299 // x86-32. | 2258 // x86-32. |
| 2300 constexpr SizeT MaxSrcs = 1; | 2259 constexpr SizeT MaxSrcs = 1; |
| 2301 Type DestType = Dest->getType(); | |
| 2302 IceString TargetString; | 2260 IceString TargetString; |
| 2303 if (isInt32Asserting32Or64(Src0->getType())) { | 2261 if (isInt32Asserting32Or64(Src0->getType())) { |
| 2304 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 | 2262 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i32_f32 |
| 2305 : H_uitofp_i32_f64; | 2263 : H_uitofp_i32_f64; |
| 2306 } else { | 2264 } else { |
| 2307 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 | 2265 TargetString = isFloat32Asserting32Or64(DestTy) ? H_uitofp_i64_f32 |
| 2308 : H_uitofp_i64_f64; | 2266 : H_uitofp_i64_f64; |
| 2309 } | 2267 } |
| 2310 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | 2268 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); |
| 2311 Call->addArg(Src0); | 2269 Call->addArg(Src0); |
| 2312 lowerCall(Call); | 2270 lowerCall(Call); |
| 2313 return; | 2271 return; |
| 2314 } else { | 2272 } else { |
| 2315 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2273 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2316 // Zero-extend the operand. | 2274 // Zero-extend the operand. |
| 2317 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 | 2275 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 |
| 2318 Variable *T_1 = nullptr; | 2276 Variable *T_1 = nullptr; |
| 2319 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { | 2277 if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) { |
| 2320 T_1 = makeReg(IceType_i64); | 2278 T_1 = makeReg(IceType_i64); |
| 2321 } else { | 2279 } else { |
| 2322 assert(Src0RM->getType() != IceType_i64); | 2280 assert(Src0RM->getType() != IceType_i64); |
| 2323 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); | 2281 assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32); |
| 2324 T_1 = makeReg(IceType_i32); | 2282 T_1 = makeReg(IceType_i32); |
| 2325 } | 2283 } |
| 2326 Variable *T_2 = makeReg(Dest->getType()); | 2284 Variable *T_2 = makeReg(DestTy); |
| 2327 if (Src0RM->getType() == T_1->getType()) | 2285 if (Src0RM->getType() == T_1->getType()) |
| 2328 _mov(T_1, Src0RM); | 2286 _mov(T_1, Src0RM); |
| 2329 else | 2287 else |
| 2330 _movzx(T_1, Src0RM); | 2288 _movzx(T_1, Src0RM); |
| 2331 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); | 2289 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss); |
| 2332 _mov(Dest, T_2); | 2290 _mov(Dest, T_2); |
| 2333 } | 2291 } |
| 2334 break; | 2292 break; |
| 2335 } | 2293 } |
| 2336 case InstCast::Bitcast: { | 2294 case InstCast::Bitcast: { |
| 2337 Operand *Src0 = Inst->getSrc(0); | 2295 Operand *Src0 = Inst->getSrc(0); |
| 2338 if (Dest->getType() == Src0->getType()) { | 2296 if (DestTy == Src0->getType()) { |
| 2339 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); | 2297 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); |
| 2340 lowerAssign(Assign); | 2298 lowerAssign(Assign); |
| 2341 return; | 2299 return; |
| 2342 } | 2300 } |
| 2343 switch (Dest->getType()) { | 2301 switch (DestTy) { |
| 2344 default: | 2302 default: |
| 2345 llvm_unreachable("Unexpected Bitcast dest type"); | 2303 llvm_unreachable("Unexpected Bitcast dest type"); |
| 2346 case IceType_i8: { | 2304 case IceType_i8: { |
| 2347 assert(Src0->getType() == IceType_v8i1); | 2305 assert(Src0->getType() == IceType_v8i1); |
| 2348 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1); | 2306 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1); |
| 2349 Call->addArg(Src0); | 2307 Call->addArg(Src0); |
| 2350 lowerCall(Call); | 2308 lowerCall(Call); |
| 2351 } break; | 2309 } break; |
| 2352 case IceType_i16: { | 2310 case IceType_i16: { |
| 2353 assert(Src0->getType() == IceType_v16i1); | 2311 assert(Src0->getType() == IceType_v16i1); |
| 2354 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1); | 2312 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1); |
| 2355 Call->addArg(Src0); | 2313 Call->addArg(Src0); |
| 2356 lowerCall(Call); | 2314 lowerCall(Call); |
| 2357 } break; | 2315 } break; |
| 2358 case IceType_i32: | 2316 case IceType_i32: |
| 2359 case IceType_f32: { | 2317 case IceType_f32: { |
| 2360 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2318 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2361 Type DestType = Dest->getType(); | |
| 2362 Type SrcType = Src0RM->getType(); | 2319 Type SrcType = Src0RM->getType(); |
| 2363 (void)DestType; | 2320 assert((DestTy == IceType_i32 && SrcType == IceType_f32) || |
| 2364 assert((DestType == IceType_i32 && SrcType == IceType_f32) || | 2321 (DestTy == IceType_f32 && SrcType == IceType_i32)); |
| 2365 (DestType == IceType_f32 && SrcType == IceType_i32)); | |
| 2366 // a.i32 = bitcast b.f32 ==> | 2322 // a.i32 = bitcast b.f32 ==> |
| 2367 // t.f32 = b.f32 | 2323 // t.f32 = b.f32 |
| 2368 // s.f32 = spill t.f32 | 2324 // s.f32 = spill t.f32 |
| 2369 // a.i32 = s.f32 | 2325 // a.i32 = s.f32 |
| 2370 Variable *T = nullptr; | 2326 Variable *T = nullptr; |
| 2371 // TODO: Should be able to force a spill setup by calling legalize() with | 2327 // TODO: Should be able to force a spill setup by calling legalize() with |
| 2372 // Legal_Mem and not Legal_Reg or Legal_Imm. | 2328 // Legal_Mem and not Legal_Reg or Legal_Imm. |
| 2373 typename Traits::SpillVariable *SpillVar = | 2329 typename Traits::SpillVariable *SpillVar = |
| 2374 Func->makeVariable<typename Traits::SpillVariable>(SrcType); | 2330 Func->makeVariable<typename Traits::SpillVariable>(SrcType); |
| 2375 SpillVar->setLinkedTo(Dest); | 2331 SpillVar->setLinkedTo(Dest); |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2429 if (Traits::Is64Bit) { | 2385 if (Traits::Is64Bit) { |
| 2430 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2386 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2431 Variable *T = makeReg(IceType_f64); | 2387 Variable *T = makeReg(IceType_f64); |
| 2432 // Movd requires its fp argument (in this case, the bitcast | 2388 // Movd requires its fp argument (in this case, the bitcast |
| 2433 // destination) to be an xmm register. | 2389 // destination) to be an xmm register. |
| 2434 _movd(T, Src0RM); | 2390 _movd(T, Src0RM); |
| 2435 _mov(Dest, T); | 2391 _mov(Dest, T); |
| 2436 } else { | 2392 } else { |
| 2437 Src0 = legalize(Src0); | 2393 Src0 = legalize(Src0); |
| 2438 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { | 2394 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { |
| 2439 Variable *T = Func->makeVariable(Dest->getType()); | 2395 Variable *T = Func->makeVariable(DestTy); |
| 2440 _movq(T, Src0); | 2396 _movq(T, Src0); |
| 2441 _movq(Dest, T); | 2397 _movq(Dest, T); |
| 2442 break; | 2398 break; |
| 2443 } | 2399 } |
| 2444 // a.f64 = bitcast b.i64 ==> | 2400 // a.f64 = bitcast b.i64 ==> |
| 2445 // t_lo.i32 = b_lo.i32 | 2401 // t_lo.i32 = b_lo.i32 |
| 2446 // FakeDef(s.f64) | 2402 // FakeDef(s.f64) |
| 2447 // lo(s.f64) = t_lo.i32 | 2403 // lo(s.f64) = t_lo.i32 |
| 2448 // t_hi.i32 = b_hi.i32 | 2404 // t_hi.i32 = b_hi.i32 |
| 2449 // hi(s.f64) = t_hi.i32 | 2405 // hi(s.f64) = t_hi.i32 |
| (...skipping 580 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3030 | 2986 |
| 3031 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || | 2987 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || |
| 3032 InstructionSet >= Traits::SSE4_1) { | 2988 InstructionSet >= Traits::SSE4_1) { |
| 3033 // Use insertps, pinsrb, pinsrw, or pinsrd. | 2989 // Use insertps, pinsrb, pinsrw, or pinsrd. |
| 3034 Operand *ElementRM = | 2990 Operand *ElementRM = |
| 3035 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 2991 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
| 3036 Operand *SourceVectRM = | 2992 Operand *SourceVectRM = |
| 3037 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 2993 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 3038 Variable *T = makeReg(Ty); | 2994 Variable *T = makeReg(Ty); |
| 3039 _movp(T, SourceVectRM); | 2995 _movp(T, SourceVectRM); |
| 3040 if (Ty == IceType_v4f32) | 2996 if (Ty == IceType_v4f32) { |
| 3041 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); | 2997 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4)); |
| 3042 else | 2998 } else { |
| 3043 // TODO(stichnot): For the pinsrb and pinsrw instructions, when the source | 2999 // For the pinsrb and pinsrw instructions, when the source operand is a |
| 3044 // operand is a register, it must be a full r32 register like eax, and not | 3000 // register, it must be a full r32 register like eax, and not ax/al/ah. |
| 3045 // ax/al/ah. For filetype=asm, InstX86Pinsr<Machine>::emit() compensates | 3001 // For filetype=asm, InstX86Pinsr<Machine>::emit() compensates for the use |
| 3046 // for the use of r16 and r8 by converting them through getBaseReg(), | 3002 // of r16 and r8 by converting them through getBaseReg(), while emitIAS() |
| 3047 // while emitIAS() validates that the original and base register encodings | 3003 // validates that the original and base register encodings are the same. |
| 3048 // are the same. But for an "interior" register like ah, it should | 3004 if (ElementRM->getType() == IceType_i8 && |
| 3049 // probably be copied into an r32 via movzx so that the types work out. | 3005 llvm::isa<Variable>(ElementRM)) { |
| 3006 // Don't use ah/bh/ch/dh for pinsrb. |
| 3007 ElementRM = copyToReg8(ElementRM); |
| 3008 } |
| 3050 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); | 3009 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index)); |
| 3010 } |
| 3051 _movp(Inst->getDest(), T); | 3011 _movp(Inst->getDest(), T); |
| 3052 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 3012 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 3053 // Use shufps or movss. | 3013 // Use shufps or movss. |
| 3054 Variable *ElementR = nullptr; | 3014 Variable *ElementR = nullptr; |
| 3055 Operand *SourceVectRM = | 3015 Operand *SourceVectRM = |
| 3056 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 3016 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 3057 | 3017 |
| 3058 if (InVectorElementTy == IceType_f32) { | 3018 if (InVectorElementTy == IceType_f32) { |
| 3059 // ElementR will be in an XMM register since it is floating point. | 3019 // ElementR will be in an XMM register since it is floating point. |
| 3060 ElementR = legalizeToReg(ElementToInsertNotLegalized); | 3020 ElementR = legalizeToReg(ElementToInsertNotLegalized); |
| (...skipping 2286 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5347 // TODO(wala,stichnot): lea should not | 5307 // TODO(wala,stichnot): lea should not |
| 5348 // be required. The address of the stack slot is known at compile time | 5308 // be required. The address of the stack slot is known at compile time |
| 5349 // (although not until after addProlog()). | 5309 // (although not until after addProlog()). |
| 5350 constexpr Type PointerType = IceType_i32; | 5310 constexpr Type PointerType = IceType_i32; |
| 5351 Variable *Loc = makeReg(PointerType); | 5311 Variable *Loc = makeReg(PointerType); |
| 5352 _lea(Loc, Slot); | 5312 _lea(Loc, Slot); |
| 5353 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); | 5313 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
| 5354 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); | 5314 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); |
| 5355 } | 5315 } |
| 5356 | 5316 |
| 5317 /// Lowering helper to copy a scalar integer source operand into some 8-bit GPR. |
| 5318 /// Src is assumed to already be legalized. If the source operand is known to |
| 5319 /// be a memory or immediate operand, a simple mov will suffice. But if the |
| 5320 /// source operand can be a physical register, then it must first be copied into |
| 5321 /// a physical register that is truncable to 8-bit, then truncated into a |
| 5322 /// physical register that can receive a truncation, and finally copied into the |
| 5323 /// result 8-bit register (which in general can be any 8-bit register). For |
| 5324 /// example, moving %ebp into %ah may be accomplished as: |
| 5325 /// movl %ebp, %edx |
| 5326 /// mov_trunc %edx, %dl // this redundant assignment is ultimately elided |
| 5327 /// movb %dl, %ah |
| 5328 /// On the other hand, moving a memory or immediate operand into ah: |
| 5329 /// movb 4(%ebp), %ah |
| 5330 /// movb $my_imm, %ah |
| 5331 /// |
| 5332 /// Note #1. On a 64-bit target, the "movb 4(%ebp), %ah" is likely not |
| 5333 /// encodable, so RegNum=Reg_ah should NOT be given as an argument. Instead, |
| 5334 /// use RegNum=NoRegister and then let the caller do a separate copy into |
| 5335 /// Reg_ah. |
| 5336 /// |
| 5337 /// Note #2. ConstantRelocatable operands are also put through this process |
| 5338 /// (not truncated directly) because our ELF emitter does R_386_32 relocations |
| 5339 /// but not R_386_8 relocations. |
| 5340 /// |
| 5341 /// Note #3. If Src is a Variable, the result will be an infinite-weight i8 |
| 5342 /// Variable with the RCX86_IsTrunc8Rcvr register class. As such, this helper |
| 5343 /// is a convenient way to prevent ah/bh/ch/dh from being an (invalid) argument |
| 5344 /// to the pinsrb instruction. |
| 5345 template <class Machine> |
| 5346 Variable *TargetX86Base<Machine>::copyToReg8(Operand *Src, int32_t RegNum) { |
| 5347 Type Ty = Src->getType(); |
| 5348 assert(isScalarIntegerType(Ty)); |
| 5349 assert(Ty != IceType_i1); |
| 5350 Variable *Reg = makeReg(IceType_i8, RegNum); |
| 5351 Reg->setRegClass(RCX86_IsTrunc8Rcvr); |
| 5352 if (llvm::isa<Variable>(Src) || llvm::isa<ConstantRelocatable>(Src)) { |
| 5353 Variable *SrcTruncable = makeReg(Ty); |
| 5354 switch (Ty) { |
| 5355 case IceType_i64: |
| 5356 SrcTruncable->setRegClass(RCX86_Is64To8); |
| 5357 break; |
| 5358 case IceType_i32: |
| 5359 SrcTruncable->setRegClass(RCX86_Is32To8); |
| 5360 break; |
| 5361 case IceType_i16: |
| 5362 SrcTruncable->setRegClass(RCX86_Is16To8); |
| 5363 break; |
| 5364 default: |
| 5365 // i8 - just use default register class |
| 5366 break; |
| 5367 } |
| 5368 Variable *SrcRcvr = makeReg(IceType_i8); |
| 5369 SrcRcvr->setRegClass(RCX86_IsTrunc8Rcvr); |
| 5370 _mov(SrcTruncable, Src); |
| 5371 _mov(SrcRcvr, SrcTruncable); |
| 5372 Src = SrcRcvr; |
| 5373 } |
| 5374 _mov(Reg, Src); |
| 5375 return Reg; |
| 5376 } |
| 5377 |
| 5357 /// Helper for legalize() to emit the right code to lower an operand to a | 5378 /// Helper for legalize() to emit the right code to lower an operand to a |
| 5358 /// register of the appropriate type. | 5379 /// register of the appropriate type. |
| 5359 template <class Machine> | 5380 template <class Machine> |
| 5360 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { | 5381 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
| 5361 Type Ty = Src->getType(); | 5382 Type Ty = Src->getType(); |
| 5362 Variable *Reg = makeReg(Ty, RegNum); | 5383 Variable *Reg = makeReg(Ty, RegNum); |
| 5363 if (isVectorType(Ty)) { | 5384 if (isVectorType(Ty)) { |
| 5364 _movp(Reg, Src); | 5385 _movp(Reg, Src); |
| 5365 } else { | 5386 } else { |
| 5366 _mov(Reg, Src); | 5387 _mov(Reg, Src); |
| (...skipping 493 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5860 } | 5881 } |
| 5861 // the offset is not eligible for blinding or pooling, return the original | 5882 // the offset is not eligible for blinding or pooling, return the original |
| 5862 // mem operand | 5883 // mem operand |
| 5863 return MemOperand; | 5884 return MemOperand; |
| 5864 } | 5885 } |
| 5865 | 5886 |
| 5866 } // end of namespace X86Internal | 5887 } // end of namespace X86Internal |
| 5867 } // end of namespace Ice | 5888 } // end of namespace Ice |
| 5868 | 5889 |
| 5869 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5890 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |