Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(181)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86BaseImpl.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after
1290 } 1290 }
1291 _mov(Dest, SP); 1291 _mov(Dest, SP);
1292 } 1292 }
1293 1293
1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1296 return; 1296 return;
1297 Variable *SrcLoReg = legalizeToReg(SrcLo); 1297 Variable *SrcLoReg = legalizeToReg(SrcLo);
1298 switch (Ty) { 1298 switch (Ty) {
1299 default: 1299 default:
1300 llvm_unreachable("Unexpected type"); 1300 llvm::report_fatal_error("Unexpected type");
1301 case IceType_i8: { 1301 case IceType_i8:
1302 Operand *Mask =
1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1304 _tst(SrcLoReg, Mask);
1305 break;
1306 }
1307 case IceType_i16: { 1302 case IceType_i16: {
1308 Operand *Mask = 1303 Operand *ShAmtF =
1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),
1310 _tst(SrcLoReg, Mask); 1305 Legal_Reg | Legal_Flex);
1311 break; 1306 Variable *T = makeReg(IceType_i32);
1312 } 1307 _lsls(T, SrcLoReg, ShAmtF);
1308 Context.insert(InstFakeUse::create(Func, T));
1309 } break;
1313 case IceType_i32: { 1310 case IceType_i32: {
1314 _tst(SrcLoReg, SrcLoReg); 1311 _tst(SrcLoReg, SrcLoReg);
1315 break; 1312 break;
1316 } 1313 }
1317 case IceType_i64: { 1314 case IceType_i64: {
1318 Variable *ScratchReg = makeReg(IceType_i32); 1315 Variable *T = makeReg(IceType_i32);
1319 _orrs(ScratchReg, SrcLoReg, SrcHi); 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
1320 // ScratchReg isn't going to be used, but we need the side-effect of 1317 // T isn't going to be used, but we need the side-effect of setting flags
1321 // setting flags from this operation. 1318 // from this operation.
1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); 1319 Context.insert(InstFakeUse::create(Func, T));
1323 } 1320 }
1324 } 1321 }
1325 InstARM32Label *Label = InstARM32Label::create(Func, this); 1322 InstARM32Label *Label = InstARM32Label::create(Func, this);
1326 _br(Label, CondARM32::NE); 1323 _br(Label, CondARM32::NE);
1327 _trap(); 1324 _trap();
1328 Context.insert(Label); 1325 Context.insert(Label);
1329 } 1326 }
1330 1327
1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1332 Operand *Src1, ExtInstr ExtFunc, 1329 Operand *Src1, ExtInstr ExtFunc,
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
1397 _orr(T, Src0, Src1RF); 1394 _orr(T, Src0, Src1RF);
1398 break; 1395 break;
1399 case InstArithmetic::Xor: 1396 case InstArithmetic::Xor:
1400 _eor(T, Src0, Src1RF); 1397 _eor(T, Src0, Src1RF);
1401 break; 1398 break;
1402 } 1399 }
1403 _mov(Dest, T); 1400 _mov(Dest, T);
1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
1405 } 1402 }
1406 1403
1404 namespace {
1405 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
1406 // It holds the two sources operands, and maintains some state as to whether one
1407 // of them is a constant. If one of the operands is a constant, then it will be
1408 // be stored as the operation's second source, with a bit indicating whether the
1409 // operands were swapped.
1410 //
1411 // The class is split into a base class with operand type-independent methods,
1412 // and a derived, templated class, for each type of operand we want to fold
1413 // constants for:
1414 //
1415 // NumericOperandsBase --> NumericOperands<ConstantFloat>
1416 // --> NumericOperands<ConstantDouble>
1417 // --> NumericOperands<ConstantInt32>
1418 //
1419 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
1420 // inverted/negated immediates.
1421 class NumericOperandsBase {
1422 NumericOperandsBase() = delete;
1423 NumericOperandsBase(const NumericOperandsBase &) = delete;
1424 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
1425
1426 public:
1427 NumericOperandsBase(Operand *S0, Operand *S1)
1428 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
1429 Swapped(Src0 == S1 && S0 != S1) {
1430 assert(Src0 != nullptr);
1431 assert(Src1 != nullptr);
1432 assert(Src0 != Src1 || S0 == S1);
1433 }
1434
1435 bool hasConstOperand() const {
1436 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
1437 }
1438
1439 bool swappedOperands() const { return Swapped; }
1440
1441 Variable *src0R(TargetARM32 *Target) const {
1442 return legalizeToReg(Target, Src0);
1443 }
1444
1445 Variable *unswappedSrc0R(TargetARM32 *Target) const {
1446 return legalizeToReg(Target, Swapped ? Src1 : Src0);
1447 }
1448
1449 Operand *src1RF(TargetARM32 *Target) const {
1450 return legalizeToRegOrFlex(Target, Src1);
1451 }
1452
1453 Variable *unswappedSrc1R(TargetARM32 *Target) const {
1454 return legalizeToReg(Target, Swapped ? Src0 : Src1);
1455 }
1456
1457 Operand *unswappedSrc1RF(TargetARM32 *Target) const {
1458 return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1);
1459 }
1460
1461 protected:
1462 Operand *const Src0;
1463 Operand *const Src1;
1464 const bool Swapped;
1465
1466 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
1467 return Target->legalizeToReg(Src);
1468 }
1469
1470 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
1471 return Target->legalize(Src,
1472 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
1473 }
1474
1475 private:
1476 static Operand *NonConstOperand(Operand *S0, Operand *S1) {
1477 if (!llvm::isa<Constant>(S0))
1478 return S0;
1479 if (!llvm::isa<Constant>(S1))
1480 return S1;
1481 if (llvm::isa<ConstantRelocatable>(S1) &&
1482 !llvm::isa<ConstantRelocatable>(S0))
1483 return S1;
1484 return S0;
1485 }
1486
1487 static Operand *ConstOperand(Operand *S0, Operand *S1) {
1488 if (!llvm::isa<Constant>(S0))
1489 return S1;
1490 if (!llvm::isa<Constant>(S1))
1491 return S0;
1492 if (llvm::isa<ConstantRelocatable>(S1) &&
1493 !llvm::isa<ConstantRelocatable>(S0))
1494 return S0;
1495 return S1;
1496 }
1497 };
1498
1499 template <typename C> class NumericOperands : public NumericOperandsBase {
1500 NumericOperands() = delete;
1501 NumericOperands(const NumericOperands &) = delete;
1502 NumericOperands &operator=(const NumericOperands &) = delete;
1503
1504 public:
1505 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
1506 assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
1507 }
1508
1509 typename C::PrimType getConstantValue() const {
1510 return llvm::cast<C>(Src1)->getValue();
1511 }
1512 };
1513
1514 using FloatOperands = NumericOperands<ConstantFloat>;
1515 using DoubleOperands = NumericOperands<ConstantDouble>;
1516
1517 class Int32Operands : public NumericOperands<ConstantInteger32> {
1518 Int32Operands() = delete;
1519 Int32Operands(const Int32Operands &) = delete;
1520 Int32Operands &operator=(const Int32Operands &) = delete;
1521
1522 public:
1523 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
1524
1525 bool immediateIsFlexEncodable() const {
1526 uint32_t Rotate, Imm8;
1527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
1528 }
1529
1530 bool negatedImmediateIsFlexEncodable() const {
1531 uint32_t Rotate, Imm8;
1532 return OperandARM32FlexImm::canHoldImm(
1533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
1534 }
1535
1536 Operand *negatedSrc1F(TargetARM32 *Target) const {
1537 return legalizeToRegOrFlex(Target,
1538 Target->getCtx()->getConstantInt32(
1539 -static_cast<int32_t>(getConstantValue())));
1540 }
1541
1542 bool invertedImmediateIsFlexEncodable() const {
1543 uint32_t Rotate, Imm8;
1544 return OperandARM32FlexImm::canHoldImm(
1545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
1546 }
1547
1548 Operand *invertedSrc1F(TargetARM32 *Target) const {
1549 return legalizeToRegOrFlex(Target,
1550 Target->getCtx()->getConstantInt32(
1551 ~static_cast<uint32_t>(getConstantValue())));
1552 }
1553 };
1554 } // end of anonymous namespace
1555
1556 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
1557 Variable *Dest, Operand *Src0,
1558 Operand *Src1) {
1559 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
1560 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
1561 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
1562 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
1563
1564 // These helper-call-involved instructions are lowered in this separate
1565 // switch. This is because we would otherwise assume that we need to
1566 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1567 // helper calls, and such unused/redundant instructions will fail liveness
1568 // analysis under -Om1 setting.
1569 switch (Op) {
1570 default:
1571 break;
1572 case InstArithmetic::Udiv:
1573 case InstArithmetic::Sdiv:
1574 case InstArithmetic::Urem:
1575 case InstArithmetic::Srem: {
1576 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1577 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1578 // register, which will hide a constant source operand. Instead, check
1579 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1580 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
1581 if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) {
1582 _trap();
1583 return;
1584 }
1585 } else {
1586 Operand *Src1Lo = SrcsLo.unswappedSrc1R(this);
1587 Operand *Src1Hi = SrcsHi.unswappedSrc1R(this);
1588 div0Check(IceType_i64, Src1Lo, Src1Hi);
1589 }
1590 // Technically, ARM has its own aeabi routines, but we can use the
1591 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1592 // the more standard __moddi3 for rem.
1593 const char *HelperName = "";
1594 switch (Op) {
1595 default:
1596 llvm::report_fatal_error("Should have only matched div ops.");
1597 break;
1598 case InstArithmetic::Udiv:
1599 HelperName = H_udiv_i64;
1600 break;
1601 case InstArithmetic::Sdiv:
1602 HelperName = H_sdiv_i64;
1603 break;
1604 case InstArithmetic::Urem:
1605 HelperName = H_urem_i64;
1606 break;
1607 case InstArithmetic::Srem:
1608 HelperName = H_srem_i64;
1609 break;
1610 }
1611 constexpr SizeT MaxSrcs = 2;
1612 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1613 Call->addArg(Src0);
1614 Call->addArg(Src1);
1615 lowerCall(Call);
1616 return;
1617 }
1618 }
1619
1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1622 Variable *T_Lo = makeReg(DestLo->getType());
1623 Variable *T_Hi = makeReg(DestHi->getType());
1624
1625 switch (Op) {
1626 case InstArithmetic::_num:
1627 llvm::report_fatal_error("Unknown arithmetic operator");
1628 return;
1629 case InstArithmetic::Add: {
1630 Variable *Src0LoR = SrcsLo.src0R(this);
1631 Operand *Src1LoRF = SrcsLo.src1RF(this);
1632 Variable *Src0HiR = SrcsHi.src0R(this);
1633 Operand *Src1HiRF = SrcsHi.src1RF(this);
1634 _adds(T_Lo, Src0LoR, Src1LoRF);
1635 _mov(DestLo, T_Lo);
1636 _adc(T_Hi, Src0HiR, Src1HiRF);
1637 _mov(DestHi, T_Hi);
1638 return;
1639 }
1640 case InstArithmetic::And: {
1641 Variable *Src0LoR = SrcsLo.src0R(this);
1642 Operand *Src1LoRF = SrcsLo.src1RF(this);
1643 Variable *Src0HiR = SrcsHi.src0R(this);
1644 Operand *Src1HiRF = SrcsHi.src1RF(this);
1645 _and(T_Lo, Src0LoR, Src1LoRF);
1646 _mov(DestLo, T_Lo);
1647 _and(T_Hi, Src0HiR, Src1HiRF);
1648 _mov(DestHi, T_Hi);
1649 return;
1650 }
1651 case InstArithmetic::Or: {
1652 Variable *Src0LoR = SrcsLo.src0R(this);
1653 Operand *Src1LoRF = SrcsLo.src1RF(this);
1654 Variable *Src0HiR = SrcsHi.src0R(this);
1655 Operand *Src1HiRF = SrcsHi.src1RF(this);
1656 _orr(T_Lo, Src0LoR, Src1LoRF);
1657 _mov(DestLo, T_Lo);
1658 _orr(T_Hi, Src0HiR, Src1HiRF);
1659 _mov(DestHi, T_Hi);
1660 return;
1661 }
1662 case InstArithmetic::Xor: {
1663 Variable *Src0LoR = SrcsLo.src0R(this);
1664 Operand *Src1LoRF = SrcsLo.src1RF(this);
1665 Variable *Src0HiR = SrcsHi.src0R(this);
1666 Operand *Src1HiRF = SrcsHi.src1RF(this);
1667 _eor(T_Lo, Src0LoR, Src1LoRF);
1668 _mov(DestLo, T_Lo);
1669 _eor(T_Hi, Src0HiR, Src1HiRF);
1670 _mov(DestHi, T_Hi);
1671 return;
1672 }
1673 case InstArithmetic::Sub: {
1674 Variable *Src0LoR = SrcsLo.src0R(this);
1675 Operand *Src1LoRF = SrcsLo.src1RF(this);
1676 Variable *Src0HiR = SrcsHi.src0R(this);
1677 Operand *Src1HiRF = SrcsHi.src1RF(this);
1678 if (SrcsLo.swappedOperands()) {
1679 _rsbs(T_Lo, Src0LoR, Src1LoRF);
1680 _mov(DestLo, T_Lo);
1681 _rsc(T_Hi, Src0HiR, Src1HiRF);
1682 _mov(DestHi, T_Hi);
1683 } else {
1684 _subs(T_Lo, Src0LoR, Src1LoRF);
1685 _mov(DestLo, T_Lo);
1686 _sbc(T_Hi, Src0HiR, Src1HiRF);
1687 _mov(DestHi, T_Hi);
1688 }
1689 return;
1690 }
1691 case InstArithmetic::Mul: {
1692 // GCC 4.8 does:
1693 // a=b*c ==>
1694 // t_acc =(mul) (b.lo * c.hi)
1695 // t_acc =(mla) (c.lo * b.hi) + t_acc
1696 // t.hi,t.lo =(umull) b.lo * c.lo
1697 // t.hi += t_acc
1698 // a.lo = t.lo
1699 // a.hi = t.hi
1700 //
1701 // LLVM does:
1702 // t.hi,t.lo =(umull) b.lo * c.lo
1703 // t.hi =(mla) (b.lo * c.hi) + t.hi
1704 // t.hi =(mla) (b.hi * c.lo) + t.hi
1705 // a.lo = t.lo
1706 // a.hi = t.hi
1707 //
1708 // LLVM's lowering has fewer instructions, but more register pressure:
1709 // t.lo is live from beginning to end, while GCC delays the two-dest
1710 // instruction till the end, and kills c.hi immediately.
1711 Variable *T_Acc = makeReg(IceType_i32);
1712 Variable *T_Acc1 = makeReg(IceType_i32);
1713 Variable *T_Hi1 = makeReg(IceType_i32);
1714 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
1715 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
1716 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
1717 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
1718 _mul(T_Acc, Src0RLo, Src1RHi);
1719 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1720 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1721 _add(T_Hi, T_Hi1, T_Acc1);
1722 _mov(DestLo, T_Lo);
1723 _mov(DestHi, T_Hi);
1724 return;
1725 }
1726 case InstArithmetic::Shl: {
1727 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
1728 Variable *Src0RLo = SrcsLo.src0R(this);
1729 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
1730 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
1731 if (ShAmtImm == 0) {
1732 _mov(DestLo, Src0RLo);
1733 _mov(DestHi, SrcsHi.src0R(this));
1734 return;
1735 }
1736
1737 if (ShAmtImm >= 32) {
1738 if (ShAmtImm == 32) {
1739 _mov(DestHi, Src0RLo);
1740 } else {
1741 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
1742 Legal_Reg | Legal_Flex);
1743 _lsl(T_Hi, Src0RLo, ShAmtOp);
1744 _mov(DestHi, T_Hi);
1745 }
1746
1747 Operand *_0 =
1748 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1749 _mov(T_Lo, _0);
1750 _mov(DestLo, T_Lo);
1751 return;
1752 }
1753
1754 Variable *Src0RHi = SrcsHi.src0R(this);
1755 Operand *ShAmtOp =
1756 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
1757 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
1758 Legal_Reg | Legal_Flex);
1759 _lsl(T_Hi, Src0RHi, ShAmtOp);
1760 _orr(T_Hi, T_Hi,
1761 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1762 OperandARM32::LSR, ComplShAmtOp));
1763 _mov(DestHi, T_Hi);
1764
1765 _lsl(T_Lo, Src0RLo, ShAmtOp);
1766 _mov(DestLo, T_Lo);
1767 return;
1768 }
1769
1770 // a=b<<c ==>
1771 // pnacl-llc does:
1772 // mov t_b.lo, b.lo
1773 // mov t_b.hi, b.hi
1774 // mov t_c.lo, c.lo
1775 // rsb T0, t_c.lo, #32
1776 // lsr T1, t_b.lo, T0
1777 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1778 // sub T2, t_c.lo, #32
1779 // cmp T2, #0
1780 // lslge t_a.hi, t_b.lo, T2
1781 // lsl t_a.lo, t_b.lo, t_c.lo
1782 // mov a.lo, t_a.lo
1783 // mov a.hi, t_a.hi
1784 //
1785 // GCC 4.8 does:
1786 // sub t_c1, c.lo, #32
1787 // lsl t_hi, b.hi, c.lo
1788 // orr t_hi, t_hi, b.lo, lsl t_c1
1789 // rsb t_c2, c.lo, #32
1790 // orr t_hi, t_hi, b.lo, lsr t_c2
1791 // lsl t_lo, b.lo, c.lo
1792 // a.lo = t_lo
1793 // a.hi = t_hi
1794 //
1795 // These are incompatible, therefore we mimic pnacl-llc.
1796 // Can be strength-reduced for constant-shifts, but we don't do that for
1797 // now.
1798 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1799 // ARM, shifts only take the lower 8 bits of the shift register, and
1800 // saturate to the range 0-32, so the negative value will saturate to 32.
1801 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
1802 Operand *_0 =
1803 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1804 Variable *T0 = makeReg(IceType_i32);
1805 Variable *T1 = makeReg(IceType_i32);
1806 Variable *T2 = makeReg(IceType_i32);
1807 Variable *TA_Hi = makeReg(IceType_i32);
1808 Variable *TA_Lo = makeReg(IceType_i32);
1809 Variable *Src0RLo = SrcsLo.src0R(this);
1810 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
1811 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
1812 _rsb(T0, Src1RLo, _32);
1813 _lsr(T1, Src0RLo, T0);
1814 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1815 OperandARM32::LSL, Src1RLo));
1816 _sub(T2, Src1RLo, _32);
1817 _cmp(T2, _0);
1818 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1819 _set_dest_redefined();
1820 _lsl(TA_Lo, Src0RLo, Src1RLo);
1821 _mov(DestLo, TA_Lo);
1822 _mov(DestHi, TA_Hi);
1823 return;
1824 }
1825 case InstArithmetic::Lshr:
1826 case InstArithmetic::Ashr: {
1827 const bool ASR = Op == InstArithmetic::Ashr;
1828 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
1829 Variable *Src0RHi = SrcsHi.src0R(this);
1830 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
1831 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
1832 if (ShAmtImm == 0) {
1833 _mov(DestHi, Src0RHi);
1834 _mov(DestLo, SrcsLo.src0R(this));
1835 return;
1836 }
1837
1838 if (ShAmtImm >= 32) {
1839 if (ShAmtImm == 32) {
1840 _mov(DestLo, Src0RHi);
1841 } else {
1842 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
1843 Legal_Reg | Legal_Flex);
1844 if (ASR) {
1845 _asr(T_Lo, Src0RHi, ShAmtOp);
1846 } else {
1847 _lsr(T_Lo, Src0RHi, ShAmtOp);
1848 }
1849 _mov(DestLo, T_Lo);
1850 }
1851
1852 if (ASR) {
1853 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),
1854 Legal_Reg | Legal_Flex);
1855 _asr(T_Hi, Src0RHi, _31);
1856 } else {
1857 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
1858 Legal_Reg | Legal_Flex);
1859 _mov(T_Hi, _0);
1860 }
1861 _mov(DestHi, T_Hi);
1862 return;
1863 }
1864
1865 Variable *Src0RLo = SrcsLo.src0R(this);
1866 Operand *ShAmtOp =
1867 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
1868 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
1869 Legal_Reg | Legal_Flex);
1870 _lsr(T_Lo, Src0RLo, ShAmtOp);
1871 _orr(T_Lo, T_Lo,
1872 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1873 OperandARM32::LSL, ComplShAmtOp));
1874 _mov(DestLo, T_Lo);
1875
1876 if (ASR) {
1877 _asr(T_Hi, Src0RHi, ShAmtOp);
1878 } else {
1879 _lsr(T_Hi, Src0RHi, ShAmtOp);
1880 }
1881 _mov(DestHi, T_Hi);
1882 return;
1883 }
1884
1885 // a=b>>c
1886 // pnacl-llc does:
1887 // mov t_b.lo, b.lo
1888 // mov t_b.hi, b.hi
1889 // mov t_c.lo, c.lo
1890 // lsr T0, t_b.lo, t_c.lo
1891 // rsb T1, t_c.lo, #32
1892 // orr t_a.lo, T0, t_b.hi, lsl T1
1893 // sub T2, t_c.lo, #32
1894 // cmp T2, #0
1895 // [al]srge t_a.lo, t_b.hi, T2
1896 // [al]sr t_a.hi, t_b.hi, t_c.lo
1897 // mov a.lo, t_a.lo
1898 // mov a.hi, t_a.hi
1899 //
1900 // GCC 4.8 does (lsr):
1901 // rsb t_c1, c.lo, #32
1902 // lsr t_lo, b.lo, c.lo
1903 // orr t_lo, t_lo, b.hi, lsl t_c1
1904 // sub t_c2, c.lo, #32
1905 // orr t_lo, t_lo, b.hi, lsr t_c2
1906 // lsr t_hi, b.hi, c.lo
1907 // mov a.lo, t_lo
1908 // mov a.hi, t_hi
1909 //
1910 // These are incompatible, therefore we mimic pnacl-llc.
1911 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
1912 Operand *_0 =
1913 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1914 Variable *T0 = makeReg(IceType_i32);
1915 Variable *T1 = makeReg(IceType_i32);
1916 Variable *T2 = makeReg(IceType_i32);
1917 Variable *TA_Lo = makeReg(IceType_i32);
1918 Variable *TA_Hi = makeReg(IceType_i32);
1919 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
1920 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
1921 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
1922 _lsr(T0, Src0RLo, Src1RLo);
1923 _rsb(T1, Src1RLo, _32);
1924 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1925 OperandARM32::LSL, T1));
1926 _sub(T2, Src1RLo, _32);
1927 _cmp(T2, _0);
1928 if (ASR) {
1929 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1930 _set_dest_redefined();
1931 _asr(TA_Hi, Src0RHi, Src1RLo);
1932 } else {
1933 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1934 _set_dest_redefined();
1935 _lsr(TA_Hi, Src0RHi, Src1RLo);
1936 }
1937 _mov(DestLo, TA_Lo);
1938 _mov(DestHi, TA_Hi);
1939 return;
1940 }
1941 case InstArithmetic::Fadd:
1942 case InstArithmetic::Fsub:
1943 case InstArithmetic::Fmul:
1944 case InstArithmetic::Fdiv:
1945 case InstArithmetic::Frem:
1946 llvm::report_fatal_error("FP instruction with i64 type");
1947 return;
1948 case InstArithmetic::Udiv:
1949 case InstArithmetic::Sdiv:
1950 case InstArithmetic::Urem:
1951 case InstArithmetic::Srem:
1952 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
1953 "should have already been handled before");
1954 return;
1955 }
1956 }
1957
1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 1958 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1408 Variable *Dest = Inst->getDest(); 1959 Variable *Dest = Inst->getDest();
1409 if (Dest->getType() == IceType_i1) { 1960 if (Dest->getType() == IceType_i1) {
1410 lowerInt1Arithmetic(Inst); 1961 lowerInt1Arithmetic(Inst);
1411 return; 1962 return;
1412 } 1963 }
1413 1964
1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1415 // legalize Src0 to flex or Src1 to flex and there is a reversible
1416 // instruction. E.g., reverse subtract with immediate, register vs register,
1417 // immediate.
1418 // Or it may be the case that the operands aren't swapped, but the bits can
1419 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1420 // instead of AND for some masks.
1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 1965 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 1966 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1423 if (Dest->getType() == IceType_i64) { 1967 if (Dest->getType() == IceType_i64) {
1424 // These helper-call-involved instructions are lowered in this separate 1968 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
1425 // switch. This is because we would otherwise assume that we need to 1969 return;
1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with 1970 }
1427 // helper calls, and such unused/redundant instructions will fail liveness 1971
1428 // analysis under -Om1 setting. 1972 if (isVectorType(Dest->getType())) {
1429 switch (Inst->getOp()) {
1430 default:
1431 break;
1432 case InstArithmetic::Udiv:
1433 case InstArithmetic::Sdiv:
1434 case InstArithmetic::Urem:
1435 case InstArithmetic::Srem: {
1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1438 // register, which will hide a constant source operand. Instead, check
1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1441 if (C64->getValue() == 0) {
1442 _trap();
1443 return;
1444 }
1445 } else {
1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1448 div0Check(IceType_i64, Src1Lo, Src1Hi);
1449 }
1450 // Technically, ARM has their own aeabi routines, but we can use the
1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1452 // the more standard __moddi3 for rem.
1453 const char *HelperName = "";
1454 switch (Inst->getOp()) {
1455 default:
1456 llvm_unreachable("Should have only matched div ops.");
1457 break;
1458 case InstArithmetic::Udiv:
1459 HelperName = H_udiv_i64;
1460 break;
1461 case InstArithmetic::Sdiv:
1462 HelperName = H_sdiv_i64;
1463 break;
1464 case InstArithmetic::Urem:
1465 HelperName = H_urem_i64;
1466 break;
1467 case InstArithmetic::Srem:
1468 HelperName = H_srem_i64;
1469 break;
1470 }
1471 constexpr SizeT MaxSrcs = 2;
1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1473 Call->addArg(Src0);
1474 Call->addArg(Src1);
1475 lowerCall(Call);
1476 return;
1477 }
1478 }
1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0));
1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
1483 Operand *Src1Lo = loOperand(Src1);
1484 Operand *Src1Hi = hiOperand(Src1);
1485 Variable *T_Lo = makeReg(DestLo->getType());
1486 Variable *T_Hi = makeReg(DestHi->getType());
1487 switch (Inst->getOp()) {
1488 case InstArithmetic::_num:
1489 llvm_unreachable("Unknown arithmetic operator");
1490 return;
1491 case InstArithmetic::Add:
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1494 _adds(T_Lo, Src0RLo, Src1Lo);
1495 _mov(DestLo, T_Lo);
1496 _adc(T_Hi, Src0RHi, Src1Hi);
1497 _mov(DestHi, T_Hi);
1498 return;
1499 case InstArithmetic::And:
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1502 _and(T_Lo, Src0RLo, Src1Lo);
1503 _mov(DestLo, T_Lo);
1504 _and(T_Hi, Src0RHi, Src1Hi);
1505 _mov(DestHi, T_Hi);
1506 return;
1507 case InstArithmetic::Or:
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1510 _orr(T_Lo, Src0RLo, Src1Lo);
1511 _mov(DestLo, T_Lo);
1512 _orr(T_Hi, Src0RHi, Src1Hi);
1513 _mov(DestHi, T_Hi);
1514 return;
1515 case InstArithmetic::Xor:
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1518 _eor(T_Lo, Src0RLo, Src1Lo);
1519 _mov(DestLo, T_Lo);
1520 _eor(T_Hi, Src0RHi, Src1Hi);
1521 _mov(DestHi, T_Hi);
1522 return;
1523 case InstArithmetic::Sub:
1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1526 _subs(T_Lo, Src0RLo, Src1Lo);
1527 _mov(DestLo, T_Lo);
1528 _sbc(T_Hi, Src0RHi, Src1Hi);
1529 _mov(DestHi, T_Hi);
1530 return;
1531 case InstArithmetic::Mul: {
1532 // GCC 4.8 does:
1533 // a=b*c ==>
1534 // t_acc =(mul) (b.lo * c.hi)
1535 // t_acc =(mla) (c.lo * b.hi) + t_acc
1536 // t.hi,t.lo =(umull) b.lo * c.lo
1537 // t.hi += t_acc
1538 // a.lo = t.lo
1539 // a.hi = t.hi
1540 //
1541 // LLVM does:
1542 // t.hi,t.lo =(umull) b.lo * c.lo
1543 // t.hi =(mla) (b.lo * c.hi) + t.hi
1544 // t.hi =(mla) (b.hi * c.lo) + t.hi
1545 // a.lo = t.lo
1546 // a.hi = t.hi
1547 //
1548 // LLVM's lowering has fewer instructions, but more register pressure:
1549 // t.lo is live from beginning to end, while GCC delays the two-dest
1550 // instruction till the end, and kills c.hi immediately.
1551 Variable *T_Acc = makeReg(IceType_i32);
1552 Variable *T_Acc1 = makeReg(IceType_i32);
1553 Variable *T_Hi1 = makeReg(IceType_i32);
1554 Variable *Src1RLo = legalizeToReg(Src1Lo);
1555 Variable *Src1RHi = legalizeToReg(Src1Hi);
1556 _mul(T_Acc, Src0RLo, Src1RHi);
1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1559 _add(T_Hi, T_Hi1, T_Acc1);
1560 _mov(DestLo, T_Lo);
1561 _mov(DestHi, T_Hi);
1562 return;
1563 }
1564 case InstArithmetic::Shl: {
1565 // a=b<<c ==>
1566 // pnacl-llc does:
1567 // mov t_b.lo, b.lo
1568 // mov t_b.hi, b.hi
1569 // mov t_c.lo, c.lo
1570 // rsb T0, t_c.lo, #32
1571 // lsr T1, t_b.lo, T0
1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1573 // sub T2, t_c.lo, #32
1574 // cmp T2, #0
1575 // lslge t_a.hi, t_b.lo, T2
1576 // lsl t_a.lo, t_b.lo, t_c.lo
1577 // mov a.lo, t_a.lo
1578 // mov a.hi, t_a.hi
1579 //
1580 // GCC 4.8 does:
1581 // sub t_c1, c.lo, #32
1582 // lsl t_hi, b.hi, c.lo
1583 // orr t_hi, t_hi, b.lo, lsl t_c1
1584 // rsb t_c2, c.lo, #32
1585 // orr t_hi, t_hi, b.lo, lsr t_c2
1586 // lsl t_lo, b.lo, c.lo
1587 // a.lo = t_lo
1588 // a.hi = t_hi
1589 //
1590 // These are incompatible, therefore we mimic pnacl-llc.
1591 // Can be strength-reduced for constant-shifts, but we don't do that for
1592 // now.
1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1594 // ARM, shifts only take the lower 8 bits of the shift register, and
1595 // saturate to the range 0-32, so the negative value will saturate to 32.
1596 Constant *_32 = Ctx->getConstantInt32(32);
1597 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1598 Variable *Src1RLo = legalizeToReg(Src1Lo);
1599 Variable *T0 = makeReg(IceType_i32);
1600 Variable *T1 = makeReg(IceType_i32);
1601 Variable *T2 = makeReg(IceType_i32);
1602 Variable *TA_Hi = makeReg(IceType_i32);
1603 Variable *TA_Lo = makeReg(IceType_i32);
1604 _rsb(T0, Src1RLo, _32);
1605 _lsr(T1, Src0RLo, T0);
1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1607 OperandARM32::LSL, Src1RLo));
1608 _sub(T2, Src1RLo, _32);
1609 _cmp(T2, _0);
1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1611 _set_dest_redefined();
1612 _lsl(TA_Lo, Src0RLo, Src1RLo);
1613 _mov(DestLo, TA_Lo);
1614 _mov(DestHi, TA_Hi);
1615 return;
1616 }
1617 case InstArithmetic::Lshr:
1618 case InstArithmetic::Ashr: {
1619 // a=b>>c
1620 // pnacl-llc does:
1621 // mov t_b.lo, b.lo
1622 // mov t_b.hi, b.hi
1623 // mov t_c.lo, c.lo
1624 // lsr T0, t_b.lo, t_c.lo
1625 // rsb T1, t_c.lo, #32
1626 // orr t_a.lo, T0, t_b.hi, lsl T1
1627 // sub T2, t_c.lo, #32
1628 // cmp T2, #0
1629 // [al]srge t_a.lo, t_b.hi, T2
1630 // [al]sr t_a.hi, t_b.hi, t_c.lo
1631 // mov a.lo, t_a.lo
1632 // mov a.hi, t_a.hi
1633 //
1634 // GCC 4.8 does (lsr):
1635 // rsb t_c1, c.lo, #32
1636 // lsr t_lo, b.lo, c.lo
1637 // orr t_lo, t_lo, b.hi, lsl t_c1
1638 // sub t_c2, c.lo, #32
1639 // orr t_lo, t_lo, b.hi, lsr t_c2
1640 // lsr t_hi, b.hi, c.lo
1641 // mov a.lo, t_lo
1642 // mov a.hi, t_hi
1643 //
1644 // These are incompatible, therefore we mimic pnacl-llc.
1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1646 Constant *_32 = Ctx->getConstantInt32(32);
1647 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1648 Variable *Src1RLo = legalizeToReg(Src1Lo);
1649 Variable *T0 = makeReg(IceType_i32);
1650 Variable *T1 = makeReg(IceType_i32);
1651 Variable *T2 = makeReg(IceType_i32);
1652 Variable *TA_Lo = makeReg(IceType_i32);
1653 Variable *TA_Hi = makeReg(IceType_i32);
1654 _lsr(T0, Src0RLo, Src1RLo);
1655 _rsb(T1, Src1RLo, _32);
1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1657 OperandARM32::LSL, T1));
1658 _sub(T2, Src1RLo, _32);
1659 _cmp(T2, _0);
1660 if (IsAshr) {
1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1662 _set_dest_redefined();
1663 _asr(TA_Hi, Src0RHi, Src1RLo);
1664 } else {
1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1666 _set_dest_redefined();
1667 _lsr(TA_Hi, Src0RHi, Src1RLo);
1668 }
1669 _mov(DestLo, TA_Lo);
1670 _mov(DestHi, TA_Hi);
1671 return;
1672 }
1673 case InstArithmetic::Fadd:
1674 case InstArithmetic::Fsub:
1675 case InstArithmetic::Fmul:
1676 case InstArithmetic::Fdiv:
1677 case InstArithmetic::Frem:
1678 llvm_unreachable("FP instruction with i64 type");
1679 return;
1680 case InstArithmetic::Udiv:
1681 case InstArithmetic::Sdiv:
1682 case InstArithmetic::Urem:
1683 case InstArithmetic::Srem:
1684 llvm_unreachable("Call-helper-involved instruction for i64 type "
1685 "should have already been handled before");
1686 return;
1687 }
1688 return;
1689 } else if (isVectorType(Dest->getType())) {
1690 // Add a fake def to keep liveness consistent in the meantime. 1973 // Add a fake def to keep liveness consistent in the meantime.
1691 Variable *T = makeReg(Dest->getType()); 1974 Variable *T = makeReg(Dest->getType());
1692 Context.insert(InstFakeDef::create(Func, T)); 1975 Context.insert(InstFakeDef::create(Func, T));
1693 _mov(Dest, T); 1976 _mov(Dest, T);
1694 UnimplementedError(Func->getContext()->getFlags()); 1977 UnimplementedError(Func->getContext()->getFlags());
1695 return; 1978 return;
1696 } 1979 }
1980
1697 // Dest->getType() is a non-i64 scalar. 1981 // Dest->getType() is a non-i64 scalar.
1698 Variable *Src0R = legalizeToReg(Src0);
1699 Variable *T = makeReg(Dest->getType()); 1982 Variable *T = makeReg(Dest->getType());
1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect 1983
1984 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
1701 // whether or not Src1 is a non-zero constant. Once legalized it is more 1985 // whether or not Src1 is a non-zero constant. Once legalized it is more
1702 // difficult to determine (constant may be moved to a register). 1986 // difficult to determine (constant may be moved to a register).
1987 // * Handle floating point arithmetic separately: they require Src1 to be
1988 // legalized to a register.
1703 switch (Inst->getOp()) { 1989 switch (Inst->getOp()) {
1704 default: 1990 default:
1705 break; 1991 break;
1706 case InstArithmetic::Udiv: { 1992 case InstArithmetic::Udiv: {
1707 constexpr bool NotRemainder = false; 1993 constexpr bool NotRemainder = false;
1994 Variable *Src0R = legalizeToReg(Src0);
1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 1995 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1709 H_udiv_i32, NotRemainder); 1996 H_udiv_i32, NotRemainder);
1710 return; 1997 return;
1711 } 1998 }
1712 case InstArithmetic::Sdiv: { 1999 case InstArithmetic::Sdiv: {
1713 constexpr bool NotRemainder = false; 2000 constexpr bool NotRemainder = false;
2001 Variable *Src0R = legalizeToReg(Src0);
1714 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, 2002 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1715 H_sdiv_i32, NotRemainder); 2003 H_sdiv_i32, NotRemainder);
1716 return; 2004 return;
1717 } 2005 }
1718 case InstArithmetic::Urem: { 2006 case InstArithmetic::Urem: {
1719 constexpr bool IsRemainder = true; 2007 constexpr bool IsRemainder = true;
2008 Variable *Src0R = legalizeToReg(Src0);
1720 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 2009 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1721 H_urem_i32, IsRemainder); 2010 H_urem_i32, IsRemainder);
1722 return; 2011 return;
1723 } 2012 }
1724 case InstArithmetic::Srem: { 2013 case InstArithmetic::Srem: {
1725 constexpr bool IsRemainder = true; 2014 constexpr bool IsRemainder = true;
2015 Variable *Src0R = legalizeToReg(Src0);
1726 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, 2016 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1727 H_srem_i32, IsRemainder); 2017 H_srem_i32, IsRemainder);
1728 return; 2018 return;
1729 } 2019 }
1730 case InstArithmetic::Frem: { 2020 case InstArithmetic::Frem: {
1731 const SizeT MaxSrcs = 2; 2021 constexpr SizeT MaxSrcs = 2;
2022 Variable *Src0R = legalizeToReg(Src0);
1732 Type Ty = Dest->getType(); 2023 Type Ty = Dest->getType();
1733 InstCall *Call = makeHelperCall( 2024 InstCall *Call = makeHelperCall(
1734 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 2025 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1735 Call->addArg(Src0R); 2026 Call->addArg(Src0R);
1736 Call->addArg(Src1); 2027 Call->addArg(Src1);
1737 lowerCall(Call); 2028 lowerCall(Call);
1738 return; 2029 return;
1739 } 2030 }
1740 }
1741
1742 // Handle floating point arithmetic separately: they require Src1 to be
1743 // legalized to a register.
1744 switch (Inst->getOp()) {
1745 default:
1746 break;
1747 case InstArithmetic::Fadd: { 2031 case InstArithmetic::Fadd: {
2032 Variable *Src0R = legalizeToReg(Src0);
1748 Variable *Src1R = legalizeToReg(Src1); 2033 Variable *Src1R = legalizeToReg(Src1);
1749 _vadd(T, Src0R, Src1R); 2034 _vadd(T, Src0R, Src1R);
1750 _mov(Dest, T); 2035 _mov(Dest, T);
1751 return; 2036 return;
1752 } 2037 }
1753 case InstArithmetic::Fsub: { 2038 case InstArithmetic::Fsub: {
2039 Variable *Src0R = legalizeToReg(Src0);
1754 Variable *Src1R = legalizeToReg(Src1); 2040 Variable *Src1R = legalizeToReg(Src1);
1755 _vsub(T, Src0R, Src1R); 2041 _vsub(T, Src0R, Src1R);
1756 _mov(Dest, T); 2042 _mov(Dest, T);
1757 return; 2043 return;
1758 } 2044 }
1759 case InstArithmetic::Fmul: { 2045 case InstArithmetic::Fmul: {
2046 Variable *Src0R = legalizeToReg(Src0);
1760 Variable *Src1R = legalizeToReg(Src1); 2047 Variable *Src1R = legalizeToReg(Src1);
1761 _vmul(T, Src0R, Src1R); 2048 _vmul(T, Src0R, Src1R);
1762 _mov(Dest, T); 2049 _mov(Dest, T);
1763 return; 2050 return;
1764 } 2051 }
1765 case InstArithmetic::Fdiv: { 2052 case InstArithmetic::Fdiv: {
2053 Variable *Src0R = legalizeToReg(Src0);
1766 Variable *Src1R = legalizeToReg(Src1); 2054 Variable *Src1R = legalizeToReg(Src1);
1767 _vdiv(T, Src0R, Src1R); 2055 _vdiv(T, Src0R, Src1R);
1768 _mov(Dest, T); 2056 _mov(Dest, T);
1769 return; 2057 return;
1770 } 2058 }
1771 } 2059 }
1772 2060
1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); 2061 // Handle everything else here.
2062 Int32Operands Srcs(Src0, Src1);
1774 switch (Inst->getOp()) { 2063 switch (Inst->getOp()) {
1775 case InstArithmetic::_num: 2064 case InstArithmetic::_num:
1776 llvm_unreachable("Unknown arithmetic operator"); 2065 llvm::report_fatal_error("Unknown arithmetic operator");
1777 return; 2066 return;
1778 case InstArithmetic::Add: 2067 case InstArithmetic::Add: {
2068 if (Srcs.hasConstOperand()) {
2069 if (!Srcs.immediateIsFlexEncodable() &&
2070 Srcs.negatedImmediateIsFlexEncodable()) {
2071 Variable *Src0R = Srcs.src0R(this);
2072 Operand *Src1F = Srcs.negatedSrc1F(this);
2073 if (!Srcs.swappedOperands()) {
2074 _sub(T, Src0R, Src1F);
2075 } else {
2076 _rsb(T, Src0R, Src1F);
2077 }
2078 _mov(Dest, T);
2079 return;
2080 }
2081 }
2082 Variable *Src0R = Srcs.src0R(this);
2083 Operand *Src1RF = Srcs.src1RF(this);
1779 _add(T, Src0R, Src1RF); 2084 _add(T, Src0R, Src1RF);
1780 _mov(Dest, T); 2085 _mov(Dest, T);
1781 return; 2086 return;
1782 case InstArithmetic::And: 2087 }
2088 case InstArithmetic::And: {
2089 if (Srcs.hasConstOperand()) {
2090 if (!Srcs.immediateIsFlexEncodable() &&
2091 Srcs.invertedImmediateIsFlexEncodable()) {
2092 Variable *Src0R = Srcs.src0R(this);
2093 Operand *Src1F = Srcs.invertedSrc1F(this);
2094 _bic(T, Src0R, Src1F);
2095 _mov(Dest, T);
2096 return;
2097 }
2098 }
2099 Variable *Src0R = Srcs.src0R(this);
2100 Operand *Src1RF = Srcs.src1RF(this);
1783 _and(T, Src0R, Src1RF); 2101 _and(T, Src0R, Src1RF);
1784 _mov(Dest, T); 2102 _mov(Dest, T);
1785 return; 2103 return;
1786 case InstArithmetic::Or: 2104 }
2105 case InstArithmetic::Or: {
2106 Variable *Src0R = Srcs.src0R(this);
2107 Operand *Src1RF = Srcs.src1RF(this);
1787 _orr(T, Src0R, Src1RF); 2108 _orr(T, Src0R, Src1RF);
1788 _mov(Dest, T); 2109 _mov(Dest, T);
1789 return; 2110 return;
1790 case InstArithmetic::Xor: 2111 }
2112 case InstArithmetic::Xor: {
2113 Variable *Src0R = Srcs.src0R(this);
2114 Operand *Src1RF = Srcs.src1RF(this);
1791 _eor(T, Src0R, Src1RF); 2115 _eor(T, Src0R, Src1RF);
1792 _mov(Dest, T); 2116 _mov(Dest, T);
1793 return; 2117 return;
1794 case InstArithmetic::Sub: 2118 }
1795 _sub(T, Src0R, Src1RF); 2119 case InstArithmetic::Sub: {
2120 if (Srcs.hasConstOperand()) {
2121 Variable *Src0R = Srcs.src0R(this);
2122 if (Srcs.immediateIsFlexEncodable()) {
2123 Operand *Src1RF = Srcs.src1RF(this);
2124 if (Srcs.swappedOperands()) {
2125 _rsb(T, Src0R, Src1RF);
2126 } else {
2127 _sub(T, Src0R, Src1RF);
2128 }
2129 _mov(Dest, T);
2130 return;
2131 }
2132 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
2133 Operand *Src1F = Srcs.negatedSrc1F(this);
2134 _add(T, Src0R, Src1F);
2135 _mov(Dest, T);
2136 return;
2137 }
2138 }
2139 Variable *Src0R = Srcs.unswappedSrc0R(this);
2140 Variable *Src1R = Srcs.unswappedSrc1R(this);
2141 _sub(T, Src0R, Src1R);
1796 _mov(Dest, T); 2142 _mov(Dest, T);
1797 return; 2143 return;
2144 }
1798 case InstArithmetic::Mul: { 2145 case InstArithmetic::Mul: {
1799 Variable *Src1R = legalizeToReg(Src1RF); 2146 Variable *Src0R = Srcs.unswappedSrc0R(this);
2147 Variable *Src1R = Srcs.unswappedSrc1R(this);
1800 _mul(T, Src0R, Src1R); 2148 _mul(T, Src0R, Src1R);
1801 _mov(Dest, T); 2149 _mov(Dest, T);
1802 return; 2150 return;
1803 } 2151 }
1804 case InstArithmetic::Shl: 2152 case InstArithmetic::Shl: {
1805 _lsl(T, Src0R, Src1RF); 2153 Variable *Src0R = Srcs.unswappedSrc0R(this);
2154 Operand *Src1R = Srcs.unswappedSrc1RF(this);
2155 _lsl(T, Src0R, Src1R);
1806 _mov(Dest, T); 2156 _mov(Dest, T);
1807 return; 2157 return;
1808 case InstArithmetic::Lshr: 2158 }
2159 case InstArithmetic::Lshr: {
2160 Variable *Src0R = Srcs.unswappedSrc0R(this);
1809 if (Dest->getType() != IceType_i32) { 2161 if (Dest->getType() != IceType_i32) {
1810 _uxt(Src0R, Src0R); 2162 _uxt(Src0R, Src0R);
1811 } 2163 }
1812 _lsr(T, Src0R, Src1RF); 2164 _lsr(T, Src0R, Srcs.unswappedSrc1RF(this));
1813 _mov(Dest, T); 2165 _mov(Dest, T);
1814 return; 2166 return;
1815 case InstArithmetic::Ashr: 2167 }
2168 case InstArithmetic::Ashr: {
2169 Variable *Src0R = Srcs.unswappedSrc0R(this);
1816 if (Dest->getType() != IceType_i32) { 2170 if (Dest->getType() != IceType_i32) {
1817 _sxt(Src0R, Src0R); 2171 _sxt(Src0R, Src0R);
1818 } 2172 }
1819 _asr(T, Src0R, Src1RF); 2173 _asr(T, Src0R, Srcs.unswappedSrc1RF(this));
1820 _mov(Dest, T); 2174 _mov(Dest, T);
1821 return; 2175 return;
2176 }
1822 case InstArithmetic::Udiv: 2177 case InstArithmetic::Udiv:
1823 case InstArithmetic::Sdiv: 2178 case InstArithmetic::Sdiv:
1824 case InstArithmetic::Urem: 2179 case InstArithmetic::Urem:
1825 case InstArithmetic::Srem: 2180 case InstArithmetic::Srem:
1826 llvm_unreachable("Integer div/rem should have been handled earlier."); 2181 llvm::report_fatal_error(
2182 "Integer div/rem should have been handled earlier.");
1827 return; 2183 return;
1828 case InstArithmetic::Fadd: 2184 case InstArithmetic::Fadd:
1829 case InstArithmetic::Fsub: 2185 case InstArithmetic::Fsub:
1830 case InstArithmetic::Fmul: 2186 case InstArithmetic::Fmul:
1831 case InstArithmetic::Fdiv: 2187 case InstArithmetic::Fdiv:
1832 case InstArithmetic::Frem: 2188 case InstArithmetic::Frem:
1833 llvm_unreachable("Floating point arith should have been handled earlier."); 2189 llvm::report_fatal_error(
2190 "Floating point arith should have been handled earlier.");
1834 return; 2191 return;
1835 } 2192 }
1836 } 2193 }
1837 2194
1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { 2195 void TargetARM32::lowerAssign(const InstAssign *Inst) {
1839 Variable *Dest = Inst->getDest(); 2196 Variable *Dest = Inst->getDest();
1840 Operand *Src0 = Inst->getSrc(0); 2197 Operand *Src0 = Inst->getSrc(0);
1841 assert(Dest->getType() == Src0->getType()); 2198 assert(Dest->getType() == Src0->getType());
1842 if (Dest->getType() == IceType_i64) { 2199 if (Dest->getType() == IceType_i64) {
1843 Src0 = legalizeUndef(Src0); 2200 Src0 = legalizeUndef(Src0);
2201
2202 Variable *T_Lo = makeReg(IceType_i32);
2203 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 2204 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1848 Variable *T_Lo = makeReg(IceType_i32);
1849 Variable *T_Hi = makeReg(IceType_i32);
1850
1851 _mov(T_Lo, Src0Lo); 2205 _mov(T_Lo, Src0Lo);
1852 _mov(DestLo, T_Lo); 2206 _mov(DestLo, T_Lo);
2207
2208 Variable *T_Hi = makeReg(IceType_i32);
2209 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2210 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1853 _mov(T_Hi, Src0Hi); 2211 _mov(T_Hi, Src0Hi);
1854 _mov(DestHi, T_Hi); 2212 _mov(DestHi, T_Hi);
2213
2214 return;
2215 }
2216
2217 Operand *NewSrc;
2218 if (Dest->hasReg()) {
2219 // If Dest already has a physical register, then legalize the Src operand
2220 // into a Variable with the same register assignment. This especially
2221 // helps allow the use of Flex operands.
2222 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1855 } else { 2223 } else {
1856 Operand *NewSrc; 2224 // Dest could be a stack operand. Since we could potentially need to do a
1857 if (Dest->hasReg()) { 2225 // Store (and store can only have Register operands), legalize this to a
1858 // If Dest already has a physical register, then legalize the Src operand 2226 // register.
1859 // into a Variable with the same register assignment. This especially 2227 NewSrc = legalize(Src0, Legal_Reg);
1860 // helps allow the use of Flex operands.
1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1862 } else {
1863 // Dest could be a stack operand. Since we could potentially need to do a
1864 // Store (and store can only have Register operands), legalize this to a
1865 // register.
1866 NewSrc = legalize(Src0, Legal_Reg);
1867 }
1868 if (isVectorType(Dest->getType())) {
1869 Variable *SrcR = legalizeToReg(NewSrc);
1870 _mov(Dest, SrcR);
1871 } else if (isFloatingType(Dest->getType())) {
1872 Variable *SrcR = legalizeToReg(NewSrc);
1873 _mov(Dest, SrcR);
1874 } else {
1875 _mov(Dest, NewSrc);
1876 }
1877 } 2228 }
2229
2230 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
2231 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
2232 }
2233 _mov(Dest, NewSrc);
1878 } 2234 }
1879 2235
1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( 2236 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 2237 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { 2238 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
1883 InstARM32Label *NewShortCircuitLabel = nullptr; 2239 InstARM32Label *NewShortCircuitLabel = nullptr;
1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 2240 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
1885 2241
1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); 2242 const Inst *Producer = BoolComputations.getProducerOf(Boolean);
1887 2243
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after
2573 struct { 2929 struct {
2574 CondARM32::Cond CC0; 2930 CondARM32::Cond CC0;
2575 CondARM32::Cond CC1; 2931 CondARM32::Cond CC1;
2576 } TableFcmp[] = { 2932 } TableFcmp[] = {
2577 #define X(val, CC0, CC1) \ 2933 #define X(val, CC0, CC1) \
2578 { CondARM32::CC0, CondARM32::CC1 } \ 2934 { CondARM32::CC0, CondARM32::CC1 } \
2579 , 2935 ,
2580 FCMPARM32_TABLE 2936 FCMPARM32_TABLE
2581 #undef X 2937 #undef X
2582 }; 2938 };
2939
2940 bool isFloatingPointZero(Operand *Src) {
2941 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {
2942 return Utils::isPositiveZero(F32->getValue());
2943 }
2944
2945 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {
2946 return Utils::isPositiveZero(F64->getValue());
2947 }
2948
2949 return false;
2950 }
2583 } // end of anonymous namespace 2951 } // end of anonymous namespace
2584 2952
2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { 2953 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
2586 InstFcmp::FCond Condition = Instr->getCondition(); 2954 InstFcmp::FCond Condition = Instr->getCondition();
2587 switch (Condition) { 2955 switch (Condition) {
2588 case InstFcmp::False: 2956 case InstFcmp::False:
2589 return CondWhenTrue(CondARM32::kNone); 2957 return CondWhenTrue(CondARM32::kNone);
2590 case InstFcmp::True: 2958 case InstFcmp::True:
2591 return CondWhenTrue(CondARM32::AL); 2959 return CondWhenTrue(CondARM32::AL);
2592 break; 2960 break;
2593 default: { 2961 default: {
2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); 2962 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); 2963 Operand *Src1 = Instr->getSrc(1);
2596 _vcmp(Src0R, Src1R); 2964 if (isFloatingPointZero(Src1)) {
2965 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
2966 } else {
2967 _vcmp(Src0R, legalizeToReg(Src1));
2968 }
2597 _vmrs(); 2969 _vmrs();
2598 assert(Condition < llvm::array_lengthof(TableFcmp)); 2970 assert(Condition < llvm::array_lengthof(TableFcmp));
2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); 2971 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
2600 } 2972 }
2601 } 2973 }
2602 } 2974 }
2603 2975
2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { 2976 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
2605 Variable *Dest = Instr->getDest(); 2977 Variable *Dest = Instr->getDest();
2606 if (isVectorType(Dest->getType())) { 2978 if (isVectorType(Dest->getType())) {
(...skipping 28 matching lines...) Expand all
2635 _mov(T, _1, Cond.WhenTrue0); 3007 _mov(T, _1, Cond.WhenTrue0);
2636 } 3008 }
2637 3009
2638 if (Cond.WhenTrue1 != CondARM32::kNone) { 3010 if (Cond.WhenTrue1 != CondARM32::kNone) {
2639 _mov_redefined(T, _1, Cond.WhenTrue1); 3011 _mov_redefined(T, _1, Cond.WhenTrue1);
2640 } 3012 }
2641 3013
2642 _mov(Dest, T); 3014 _mov(Dest, T);
2643 } 3015 }
2644 3016
2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { 3017 TargetARM32::CondWhenTrue
2646 assert(Inst->getSrc(0)->getType() != IceType_i1); 3018 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
2647 assert(Inst->getSrc(1)->getType() != IceType_i1); 3019 Operand *Src1) {
3020 size_t Index = static_cast<size_t>(Condition);
3021 assert(Index < llvm::array_lengthof(TableIcmp64));
2648 3022
2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 3023 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 3024 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
3025 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
3026 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
3027
3028 if (SrcsLo.hasConstOperand()) {
3029 const uint32_t ValueLo = SrcsLo.getConstantValue();
3030 const uint32_t ValueHi = SrcsHi.getConstantValue();
3031 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
3032 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
3033 Value == 0) {
3034 Variable *T = makeReg(IceType_i32);
3035 Variable *Src0LoR = SrcsLo.src0R(this);
3036 Variable *Src0HiR = SrcsHi.src0R(this);
3037 _orrs(T, Src0LoR, Src0HiR);
3038 Context.insert(InstFakeUse::create(Func, T));
3039 return CondWhenTrue(TableIcmp64[Index].C1);
3040 }
3041
3042 Variable *Src0RLo = SrcsLo.src0R(this);
3043 Variable *Src0RHi = SrcsHi.src0R(this);
3044 Operand *Src1RFLo = SrcsLo.src1RF(this);
3045 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
3046
3047 const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands();
3048
3049 if (UseRsb) {
3050 if (TableIcmp64[Index].IsSigned) {
3051 Variable *T = makeReg(IceType_i32);
3052 _rsbs(T, Src0RLo, Src1RFLo);
3053 Context.insert(InstFakeUse::create(Func, T));
3054
3055 T = makeReg(IceType_i32);
3056 _rscs(T, Src0RHi, Src1RFHi);
3057 // We need to add a FakeUse here because liveness gets mad at us (Def
3058 // without Use.) Note that flag-setting instructions are considered to
3059 // have side effects and, therefore, are not DCE'ed.
3060 Context.insert(InstFakeUse::create(Func, T));
3061 } else {
3062 Variable *T = makeReg(IceType_i32);
3063 _rsbs(T, Src0RHi, Src1RFHi);
3064 Context.insert(InstFakeUse::create(Func, T));
3065
3066 T = makeReg(IceType_i32);
3067 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
3068 Context.insert(InstFakeUse::create(Func, T));
3069 }
3070 } else {
3071 if (TableIcmp64[Index].IsSigned) {
3072 _cmp(Src0RLo, Src1RFLo);
3073 Variable *T = makeReg(IceType_i32);
3074 _sbcs(T, Src0RHi, Src1RFHi);
3075 Context.insert(InstFakeUse::create(Func, T));
3076 } else {
3077 _cmp(Src0RHi, Src1RFHi);
3078 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
3079 }
3080 }
3081
3082 return CondWhenTrue(TableIcmp64[Index].C1);
3083 }
3084
3085 Variable *Src0RLo, *Src0RHi;
3086 Operand *Src1RFLo, *Src1RFHi;
3087 if (TableIcmp64[Index].Swapped) {
3088 Src0RLo = legalizeToReg(loOperand(Src1));
3089 Src0RHi = legalizeToReg(hiOperand(Src1));
3090 Src1RFLo = legalizeToReg(loOperand(Src0));
3091 Src1RFHi = legalizeToReg(hiOperand(Src0));
3092 } else {
3093 Src0RLo = legalizeToReg(loOperand(Src0));
3094 Src0RHi = legalizeToReg(hiOperand(Src0));
3095 Src1RFLo = legalizeToReg(loOperand(Src1));
3096 Src1RFHi = legalizeToReg(hiOperand(Src1));
3097 }
2651 3098
2652 // a=icmp cond, b, c ==> 3099 // a=icmp cond, b, c ==>
2653 // GCC does: 3100 // GCC does:
2654 // cmp b.hi, c.hi or cmp b.lo, c.lo 3101 // cmp b.hi, c.hi or cmp b.lo, c.lo
2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 3102 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2656 // mov.<C1> t, #1 mov.<C1> t, #1 3103 // mov.<C1> t, #1 mov.<C1> t, #1
2657 // mov.<C2> t, #0 mov.<C2> t, #0 3104 // mov.<C2> t, #0 mov.<C2> t, #0
2658 // mov a, t mov a, t 3105 // mov a, t mov a, t
2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 3106 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2660 // is used for signed compares. In some cases, b and c need to be swapped as 3107 // is used for signed compares. In some cases, b and c need to be swapped as
(...skipping 10 matching lines...) Expand all
2671 // that's nice in that it's just as short but has fewer dependencies for 3118 // that's nice in that it's just as short but has fewer dependencies for
2672 // better ILP at the cost of more registers. 3119 // better ILP at the cost of more registers.
2673 // 3120 //
2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two 3121 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2675 // unconditional mov #0, two cmps, two conditional mov #1, and one 3122 // unconditional mov #0, two cmps, two conditional mov #1, and one
2676 // conditional reg mov. That has few dependencies for good ILP, but is a 3123 // conditional reg mov. That has few dependencies for good ILP, but is a
2677 // longer sequence. 3124 // longer sequence.
2678 // 3125 //
2679 // So, we are going with the GCC version since it's usually better (except 3126 // So, we are going with the GCC version since it's usually better (except
2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 3127 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
3128 if (TableIcmp64[Index].IsSigned) {
3129 Variable *ScratchReg = makeReg(IceType_i32);
3130 _cmp(Src0RLo, Src1RFLo);
3131 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
3132 // ScratchReg isn't going to be used, but we need the side-effect of
3133 // setting flags from this operation.
3134 Context.insert(InstFakeUse::create(Func, ScratchReg));
3135 } else {
3136 _cmp(Src0RHi, Src1RFHi);
3137 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
3138 }
3139 return CondWhenTrue(TableIcmp64[Index].C1);
3140 }
2681 3141
2682 if (Src0->getType() == IceType_i64) { 3142 TargetARM32::CondWhenTrue
2683 InstIcmp::ICond Conditon = Inst->getCondition(); 3143 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
2684 size_t Index = static_cast<size_t>(Conditon); 3144 Operand *Src1) {
2685 assert(Index < llvm::array_lengthof(TableIcmp64)); 3145 Int32Operands Srcs(Src0, Src1);
2686 Variable *Src0Lo, *Src0Hi; 3146 if (!Srcs.hasConstOperand()) {
2687 Operand *Src1LoRF, *Src1HiRF; 3147
2688 if (TableIcmp64[Index].Swapped) { 3148 Variable *Src0R = Srcs.src0R(this);
2689 Src0Lo = legalizeToReg(loOperand(Src1)); 3149 Operand *Src1RF = Srcs.src1RF(this);
2690 Src0Hi = legalizeToReg(hiOperand(Src1)); 3150 _cmp(Src0R, Src1RF);
2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 3151 return CondWhenTrue(getIcmp32Mapping(Condition));
2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2693 } else {
2694 Src0Lo = legalizeToReg(loOperand(Src0));
2695 Src0Hi = legalizeToReg(hiOperand(Src0));
2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2698 }
2699 if (TableIcmp64[Index].IsSigned) {
2700 Variable *ScratchReg = makeReg(IceType_i32);
2701 _cmp(Src0Lo, Src1LoRF);
2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2703 // ScratchReg isn't going to be used, but we need the side-effect of
2704 // setting flags from this operation.
2705 Context.insert(InstFakeUse::create(Func, ScratchReg));
2706 } else {
2707 _cmp(Src0Hi, Src1HiRF);
2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2709 }
2710 return CondWhenTrue(TableIcmp64[Index].C1);
2711 } 3152 }
2712 3153
3154 Variable *Src0R = Srcs.src0R(this);
3155 const int32_t Value = Srcs.getConstantValue();
3156 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
3157 _tst(Src0R, Src0R);
3158 return CondWhenTrue(getIcmp32Mapping(Condition));
3159 }
3160
3161 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
3162 Srcs.negatedImmediateIsFlexEncodable()) {
3163 Operand *Src1F = Srcs.negatedSrc1F(this);
3164 _cmn(Src0R, Src1F);
3165 return CondWhenTrue(getIcmp32Mapping(Condition));
3166 }
3167
3168 Operand *Src1RF = Srcs.src1RF(this);
3169 if (!Srcs.swappedOperands()) {
3170 _cmp(Src0R, Src1RF);
3171 } else {
3172 Variable *T = makeReg(IceType_i32);
3173 _rsbs(T, Src0R, Src1RF);
3174 Context.insert(InstFakeUse::create(Func, T));
3175 }
3176 return CondWhenTrue(getIcmp32Mapping(Condition));
3177 }
3178
3179 TargetARM32::CondWhenTrue
3180 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
3181 Operand *Src1) {
3182 Int32Operands Srcs(Src0, Src1);
3183 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
3184 assert(ShAmt >= 0);
3185
3186 if (!Srcs.hasConstOperand()) {
3187 Variable *Src0R = makeReg(IceType_i32);
3188 Operand *ShAmtF =
3189 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex);
3190 _lsl(Src0R, legalizeToReg(Src0), ShAmtF);
3191
3192 Variable *Src1R = legalizeToReg(Src1);
3193 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(
3194 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);
3195 _cmp(Src0R, Src1F);
3196 return CondWhenTrue(getIcmp32Mapping(Condition));
3197 }
3198
3199 const int32_t Value = Srcs.getConstantValue();
3200 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
3201 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);
3202 Variable *T = makeReg(IceType_i32);
3203 _lsls(T, Srcs.src0R(this), ShAmtOp);
3204 Context.insert(InstFakeUse::create(Func, T));
3205 return CondWhenTrue(getIcmp32Mapping(Condition));
3206 }
3207
3208 Variable *ConstR = makeReg(IceType_i32);
3209 _mov(ConstR,
3210 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
3211 Operand *NonConstF = OperandARM32FlexReg::create(
3212 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
3213 Ctx->getConstantInt32(ShAmt));
3214
3215 if (Srcs.swappedOperands()) {
3216 _cmp(ConstR, NonConstF);
3217 } else {
3218 Variable *T = makeReg(IceType_i32);
3219 _rsbs(T, ConstR, NonConstF);
3220 Context.insert(InstFakeUse::create(Func, T));
3221 }
3222 return CondWhenTrue(getIcmp32Mapping(Condition));
3223 }
3224
3225 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
3226 assert(Inst->getSrc(0)->getType() != IceType_i1);
3227 assert(Inst->getSrc(1)->getType() != IceType_i1);
3228
3229 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
3230 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
3231
3232 const InstIcmp::ICond Condition = Inst->getCondition();
2713 // a=icmp cond b, c ==> 3233 // a=icmp cond b, c ==>
2714 // GCC does: 3234 // GCC does:
2715 // <u/s>xtb tb, b 3235 // <u/s>xtb tb, b
2716 // <u/s>xtb tc, c 3236 // <u/s>xtb tc, c
2717 // cmp tb, tc 3237 // cmp tb, tc
2718 // mov.C1 t, #0 3238 // mov.C1 t, #0
2719 // mov.C2 t, #1 3239 // mov.C2 t, #1
2720 // mov a, t 3240 // mov a, t
2721 // where the unsigned/sign extension is not needed for 32-bit. They also have 3241 // where the unsigned/sign extension is not needed for 32-bit. They also have
2722 // special cases for EQ and NE. E.g., for NE: 3242 // special cases for EQ and NE. E.g., for NE:
2723 // <extend to tb, tc> 3243 // <extend to tb, tc>
2724 // subs t, tb, tc 3244 // subs t, tb, tc
2725 // movne t, #1 3245 // movne t, #1
2726 // mov a, t 3246 // mov a, t
2727 // 3247 //
2728 // LLVM does: 3248 // LLVM does:
2729 // lsl tb, b, #<N> 3249 // lsl tb, b, #<N>
2730 // mov t, #0 3250 // mov t, #0
2731 // cmp tb, c, lsl #<N> 3251 // cmp tb, c, lsl #<N>
2732 // mov.<C> t, #1 3252 // mov.<C> t, #1
2733 // mov a, t 3253 // mov a, t
2734 // 3254 //
2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on 3255 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For 3256 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb 3257 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2738 // first. It's not clear to me why that special-casing is needed. 3258 // first. It's not clear to me why that special-casing is needed.
2739 // 3259 //
2740 // We'll go with the LLVM way for now, since it's shorter and has just as few 3260 // We'll go with the LLVM way for now, since it's shorter and has just as few
2741 // dependencies. 3261 // dependencies.
2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 3262 switch (Src0->getType()) {
2743 assert(ShiftAmt >= 0); 3263 default:
2744 Constant *ShiftConst = nullptr; 3264 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
2745 Variable *Src0R = nullptr; 3265 case IceType_i8:
2746 if (ShiftAmt) { 3266 case IceType_i16:
2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); 3267 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
2748 Src0R = makeReg(IceType_i32); 3268 case IceType_i32:
2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); 3269 return lowerInt32IcmpCond(Condition, Src0, Src1);
2750 } else { 3270 case IceType_i64:
2751 Src0R = legalizeToReg(Src0); 3271 return lowerInt64IcmpCond(Condition, Src0, Src1);
2752 } 3272 }
2753 if (ShiftAmt) {
2754 Variable *Src1R = legalizeToReg(Src1);
2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
2757 _cmp(Src0R, Src1RShifted);
2758 } else {
2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2760 _cmp(Src0R, Src1RF);
2761 }
2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));
2763 } 3273 }
2764 3274
2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { 3275 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
2766 Variable *Dest = Inst->getDest(); 3276 Variable *Dest = Inst->getDest();
2767 3277
2768 if (isVectorType(Dest->getType())) { 3278 if (isVectorType(Dest->getType())) {
2769 Variable *T = makeReg(Dest->getType()); 3279 Variable *T = makeReg(Dest->getType());
2770 Context.insert(InstFakeDef::create(Func, T)); 3280 Context.insert(InstFakeDef::create(Func, T));
2771 _mov(Dest, T); 3281 _mov(Dest, T);
2772 UnimplementedError(Func->getContext()->getFlags()); 3282 UnimplementedError(Func->getContext()->getFlags());
(...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after
4247 } 4757 }
4248 return Reg; 4758 return Reg;
4249 } 4759 }
4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 4760 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
4251 Variable *Reg = makeReg(Ty, RegNum); 4761 Variable *Reg = makeReg(Ty, RegNum);
4252 _movw(Reg, C); 4762 _movw(Reg, C);
4253 _movt(Reg, C); 4763 _movt(Reg, C);
4254 return Reg; 4764 return Reg;
4255 } else { 4765 } else {
4256 assert(isScalarFloatingType(Ty)); 4766 assert(isScalarFloatingType(Ty));
4767 uint32_t ModifiedImm;
4768 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
4769 Variable *T = makeReg(Ty, RegNum);
4770 _mov(T,
4771 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
4772 return T;
4773 }
4774
4775 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
4776 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
4777 // because ARM does not have a veor instruction with S registers.
4778 Variable *T = makeReg(IceType_f64, RegNum);
4779 Context.insert(InstFakeDef::create(Func, T));
4780 _veor(T, T, T);
4781 return T;
4782 }
4783
4257 // Load floats/doubles from literal pool. 4784 // Load floats/doubles from literal pool.
4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an
4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
4260 // immediate constants". Or, for 32-bit floating point numbers, just
4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG
4262 // instead of using a movw/movt pair to get the const-pool address then
4263 // loading to SREG.
4264 std::string Buffer; 4785 std::string Buffer;
4265 llvm::raw_string_ostream StrBuf(Buffer); 4786 llvm::raw_string_ostream StrBuf(Buffer);
4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); 4787 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
4267 llvm::cast<Constant>(From)->setShouldBePooled(true); 4788 llvm::cast<Constant>(From)->setShouldBePooled(true);
4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 4789 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
4269 Variable *BaseReg = makeReg(getPointerType()); 4790 Variable *BaseReg = makeReg(getPointerType());
4270 _movw(BaseReg, Offset); 4791 _movw(BaseReg, Offset);
4271 _movt(BaseReg, Offset); 4792 _movt(BaseReg, Offset);
4272 From = formMemoryOperand(BaseReg, Ty); 4793 From = formMemoryOperand(BaseReg, Ty);
4273 return copyToReg(From, RegNum); 4794 return copyToReg(From, RegNum);
(...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5420 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
4900 // However, for compatibility with current NaCl LLVM, don't claim that. 5421 // However, for compatibility with current NaCl LLVM, don't claim that.
4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5422 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
4902 } 5423 }
4903 5424
4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5425 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5426 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
4906 llvm::SmallBitVector TargetARM32::ScratchRegs; 5427 llvm::SmallBitVector TargetARM32::ScratchRegs;
4907 5428
4908 } // end of namespace Ice 5429 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86BaseImpl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698