Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: git pull Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after
1290 } 1290 }
1291 _mov(Dest, SP); 1291 _mov(Dest, SP);
1292 } 1292 }
1293 1293
1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1296 return; 1296 return;
1297 Variable *SrcLoReg = legalizeToReg(SrcLo); 1297 Variable *SrcLoReg = legalizeToReg(SrcLo);
1298 switch (Ty) { 1298 switch (Ty) {
1299 default: 1299 default:
1300 llvm_unreachable("Unexpected type"); 1300 llvm::report_fatal_error("Unexpected type");
1301 case IceType_i8: { 1301 case IceType_i8:
1302 Operand *Mask =
1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1304 _tst(SrcLoReg, Mask);
1305 break;
1306 }
1307 case IceType_i16: { 1302 case IceType_i16: {
1308 Operand *Mask = 1303 Operand *ShAmtF =
1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),
1310 _tst(SrcLoReg, Mask); 1305 Legal_Reg | Legal_Flex);
1311 break; 1306 Variable *T = makeReg(IceType_i32);
1312 } 1307 _lsls(T, SrcLoReg, ShAmtF);
1308 Context.insert(InstFakeUse::create(Func, T));
1309 } break;
1313 case IceType_i32: { 1310 case IceType_i32: {
1314 _tst(SrcLoReg, SrcLoReg); 1311 _tst(SrcLoReg, SrcLoReg);
1315 break; 1312 break;
1316 } 1313 }
1317 case IceType_i64: { 1314 case IceType_i64: {
1318 Variable *ScratchReg = makeReg(IceType_i32); 1315 Variable *T = makeReg(IceType_i32);
1319 _orrs(ScratchReg, SrcLoReg, SrcHi); 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
1320 // ScratchReg isn't going to be used, but we need the side-effect of 1317 // T isn't going to be used, but we need the side-effect of setting flags
1321 // setting flags from this operation. 1318 // from this operation.
1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); 1319 Context.insert(InstFakeUse::create(Func, T));
1323 } 1320 }
1324 } 1321 }
1325 InstARM32Label *Label = InstARM32Label::create(Func, this); 1322 InstARM32Label *Label = InstARM32Label::create(Func, this);
1326 _br(Label, CondARM32::NE); 1323 _br(Label, CondARM32::NE);
1327 _trap(); 1324 _trap();
1328 Context.insert(Label); 1325 Context.insert(Label);
1329 } 1326 }
1330 1327
1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1332 Operand *Src1, ExtInstr ExtFunc, 1329 Operand *Src1, ExtInstr ExtFunc,
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
1397 _orr(T, Src0, Src1RF); 1394 _orr(T, Src0, Src1RF);
1398 break; 1395 break;
1399 case InstArithmetic::Xor: 1396 case InstArithmetic::Xor:
1400 _eor(T, Src0, Src1RF); 1397 _eor(T, Src0, Src1RF);
1401 break; 1398 break;
1402 } 1399 }
1403 _mov(Dest, T); 1400 _mov(Dest, T);
1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
1405 } 1402 }
1406 1403
1404 namespace {
1405 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
1406 // It holds the two operantion's sources, and maintains some state as to whether
Jim Stichnoth 2015/11/16 23:06:26 operations'
John 2015/11/17 22:17:05 well, this is the two sources of the operation n
1407 // one of them is a constant. If one of the operands is a constant, then it will
1408 // be stored as the operation's second source, with a bit indicating whether the
1409 // operands were swapped.
1410 //
1411 // The class is split in a base class with operand type-independent methods, and
Jim Stichnoth 2015/11/16 23:06:26 split into ?
John 2015/11/17 22:17:05 Done.
1412 // a derived, templated class, for each type of operand we want to fold
1413 // constants for:
1414 //
1415 // NumericOperandsBase --> NumericOperands<ConstantFloat>
1416 // --> NumericOperands<ConstantDouble>
1417 // --> NumericOperands<ConstantInt32>
1418 //
1419 // NumericOperands<ConstantInt32> also exposes helper methods for emiting
Jim Stichnoth 2015/11/16 23:06:26 emitting
John 2015/11/17 22:17:05 Done.
1420 // inverted/negated immediates.
1421 class NumericOperandsBase {
1422 NumericOperandsBase() = delete;
1423 NumericOperandsBase(const NumericOperandsBase &) = delete;
1424 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
1425
1426 public:
1427 NumericOperandsBase(Operand *S0, Operand *S1)
1428 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
1429 Swapped(Src0 == S1 && S0 != S1) {
1430 assert(Src0 != nullptr);
1431 assert(Src1 != nullptr);
1432 assert(Src0 != Src1 || S0 == S1);
1433 }
1434
1435 bool hasConstOperand() const {
1436 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
1437 }
1438
1439 bool swappedOperands() const { return Swapped; }
1440
1441 Variable *src0R(TargetARM32 *Target) const {
1442 return legalizeToReg(Target, Src0);
1443 }
1444
1445 Variable *unswappedSrc0R(TargetARM32 *Target) const {
1446 return legalizeToReg(Target, Swapped ? Src1 : Src0);
1447 }
1448
1449 Operand *src1RF(TargetARM32 *Target) const {
1450 return legalizeToRegOrFlex(Target, Src1);
1451 }
1452
1453 Variable *unswappedSrc1R(TargetARM32 *Target) const {
1454 return legalizeToReg(Target, Swapped ? Src0 : Src1);
1455 }
1456
1457 Operand *unswappedSrc1RF(TargetARM32 *Target) const {
1458 return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1);
1459 }
1460
1461 protected:
1462 Operand *const Src0;
1463 Operand *const Src1;
1464 const bool Swapped;
1465
1466 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
1467 return Target->legalizeToReg(Src);
1468 }
1469
1470 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
1471 return Target->legalize(Src,
1472 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
1473 }
1474
1475 private:
1476 static Operand *NonConstOperand(Operand *S0, Operand *S1) {
1477 if (!llvm::isa<Constant>(S0))
1478 return S0;
1479 if (!llvm::isa<Constant>(S1))
1480 return S1;
1481 if (llvm::isa<ConstantRelocatable>(S1) &&
1482 !llvm::isa<ConstantRelocatable>(S0))
1483 return S1;
1484 return S0;
1485 }
1486
1487 static Operand *ConstOperand(Operand *S0, Operand *S1) {
1488 if (!llvm::isa<Constant>(S0))
1489 return S1;
1490 if (!llvm::isa<Constant>(S1))
1491 return S0;
1492 if (llvm::isa<ConstantRelocatable>(S1) &&
1493 !llvm::isa<ConstantRelocatable>(S0))
1494 return S0;
1495 return S1;
1496 }
1497 };
1498
1499 template <typename C> class NumericOperands : public NumericOperandsBase {
1500 NumericOperands() = delete;
1501 NumericOperands(const NumericOperands &) = delete;
1502 NumericOperands &operator=(const NumericOperands &) = delete;
1503
1504 public:
1505 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
1506 assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
1507 }
1508
1509 typename C::PrimType getConstantValue() const {
1510 return llvm::cast<C>(Src1)->getValue();
1511 }
1512 };
1513
1514 using FloatOperands = NumericOperands<ConstantFloat>;
1515 using DoubleOperands = NumericOperands<ConstantDouble>;
1516
1517 class Int32Operands : public NumericOperands<ConstantInteger32> {
1518 Int32Operands() = delete;
1519 Int32Operands(const Int32Operands &) = delete;
1520 Int32Operands &operator=(const Int32Operands &) = delete;
1521
1522 public:
1523 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
1524
1525 bool immediateIsFlexEncodable() const {
1526 uint32_t Rotate, Imm8;
1527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
1528 }
1529
1530 bool negatedImmediateIsFlexEncodable() const {
1531 uint32_t Rotate, Imm8;
1532 return OperandARM32FlexImm::canHoldImm(
1533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
1534 }
1535
1536 Operand *negatedSrc1F(TargetARM32 *Target) const {
1537 return legalizeToRegOrFlex(Target,
1538 Target->getCtx()->getConstantInt32(
1539 -static_cast<int32_t>(getConstantValue())));
1540 }
1541
1542 bool invertedImmediateIsFlexEncodable() const {
1543 uint32_t Rotate, Imm8;
1544 return OperandARM32FlexImm::canHoldImm(
1545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
1546 }
1547
1548 Operand *invertedSrc1F(TargetARM32 *Target) const {
1549 return legalizeToRegOrFlex(Target,
1550 Target->getCtx()->getConstantInt32(
1551 ~static_cast<uint32_t>(getConstantValue())));
1552 }
1553 };
1554 } // end of anonymous namespace
1555
1556 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
1557 Variable *Dest, Operand *Src0,
1558 Operand *Src1) {
1559 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
1560 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
1561 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
1562 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
1563
1564 // These helper-call-involved instructions are lowered in this separate
1565 // switch. This is because we would otherwise assume that we need to
1566 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1567 // helper calls, and such unused/redundant instructions will fail liveness
1568 // analysis under -Om1 setting.
1569 switch (Op) {
1570 default:
1571 break;
1572 case InstArithmetic::Udiv:
1573 case InstArithmetic::Sdiv:
1574 case InstArithmetic::Urem:
1575 case InstArithmetic::Srem: {
1576 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1577 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1578 // register, which will hide a constant source operand. Instead, check
1579 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1580 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
1581 if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) {
1582 _trap();
1583 return;
1584 }
1585 } else {
1586 Operand *Src1Lo = SrcsLo.unswappedSrc1R(this);
1587 Operand *Src1Hi = SrcsHi.unswappedSrc1R(this);
1588 div0Check(IceType_i64, Src1Lo, Src1Hi);
1589 }
1590 // Technically, ARM has its own aeabi routines, but we can use the
1591 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1592 // the more standard __moddi3 for rem.
1593 const char *HelperName = "";
1594 switch (Op) {
1595 default:
1596 llvm::report_fatal_error("Should have only matched div ops.");
1597 break;
1598 case InstArithmetic::Udiv:
1599 HelperName = H_udiv_i64;
1600 break;
1601 case InstArithmetic::Sdiv:
1602 HelperName = H_sdiv_i64;
1603 break;
1604 case InstArithmetic::Urem:
1605 HelperName = H_urem_i64;
1606 break;
1607 case InstArithmetic::Srem:
1608 HelperName = H_srem_i64;
1609 break;
1610 }
1611 constexpr SizeT MaxSrcs = 2;
1612 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1613 Call->addArg(Src0);
1614 Call->addArg(Src1);
1615 lowerCall(Call);
1616 return;
1617 }
1618 }
1619
1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1622 Variable *T_Lo = makeReg(DestLo->getType());
1623 Variable *T_Hi = makeReg(DestHi->getType());
1624
1625 switch (Op) {
1626 case InstArithmetic::_num:
1627 llvm::report_fatal_error("Unknown arithmetic operator");
1628 return;
1629 case InstArithmetic::Add:
1630 _adds(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
Jim Stichnoth 2015/11/16 23:06:26 Don't do this. src0R() and src1RF() have side eff
John 2015/11/17 22:17:05 doh... done.
1631 _mov(DestLo, T_Lo);
1632 _adc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));
1633 _mov(DestHi, T_Hi);
1634 return;
1635 case InstArithmetic::And:
1636 _and(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
1637 _mov(DestLo, T_Lo);
1638 _and(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));
1639 _mov(DestHi, T_Hi);
1640 return;
1641 case InstArithmetic::Or:
1642 _orr(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
1643 _mov(DestLo, T_Lo);
1644 _orr(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));
1645 _mov(DestHi, T_Hi);
1646 return;
1647 case InstArithmetic::Xor:
1648 _eor(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
1649 _mov(DestLo, T_Lo);
1650 _eor(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));
1651 _mov(DestHi, T_Hi);
1652 return;
1653 case InstArithmetic::Sub:
1654 if (SrcsLo.swappedOperands()) {
1655 _rsbs(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
1656 _mov(DestLo, T_Lo);
1657 _rsc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));
1658 _mov(DestHi, T_Hi);
1659 } else {
1660 _subs(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
1661 _mov(DestLo, T_Lo);
1662 _sbc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));
1663 _mov(DestHi, T_Hi);
1664 }
1665 return;
1666 case InstArithmetic::Mul: {
1667 // GCC 4.8 does:
1668 // a=b*c ==>
1669 // t_acc =(mul) (b.lo * c.hi)
1670 // t_acc =(mla) (c.lo * b.hi) + t_acc
1671 // t.hi,t.lo =(umull) b.lo * c.lo
1672 // t.hi += t_acc
1673 // a.lo = t.lo
1674 // a.hi = t.hi
1675 //
1676 // LLVM does:
1677 // t.hi,t.lo =(umull) b.lo * c.lo
1678 // t.hi =(mla) (b.lo * c.hi) + t.hi
1679 // t.hi =(mla) (b.hi * c.lo) + t.hi
1680 // a.lo = t.lo
1681 // a.hi = t.hi
1682 //
1683 // LLVM's lowering has fewer instructions, but more register pressure:
1684 // t.lo is live from beginning to end, while GCC delays the two-dest
1685 // instruction till the end, and kills c.hi immediately.
1686 Variable *T_Acc = makeReg(IceType_i32);
1687 Variable *T_Acc1 = makeReg(IceType_i32);
1688 Variable *T_Hi1 = makeReg(IceType_i32);
1689 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
1690 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
1691 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
1692 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
1693 _mul(T_Acc, Src0RLo, Src1RHi);
1694 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1695 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1696 _add(T_Hi, T_Hi1, T_Acc1);
1697 _mov(DestLo, T_Lo);
1698 _mov(DestHi, T_Hi);
1699 return;
1700 }
1701 case InstArithmetic::Shl: {
1702 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
1703 Variable *Src0RLo = SrcsLo.src0R(this);
1704 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
1705 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
1706 if (ShAmtImm == 0) {
1707 _mov(DestLo, Src0RLo);
1708 _mov(DestHi, SrcsHi.src0R(this));
1709 return;
1710 }
1711
1712 if (ShAmtImm >= 32) {
1713 if (ShAmtImm == 32) {
1714 _mov(DestHi, Src0RLo);
1715 } else {
1716 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
1717 Legal_Reg | Legal_Flex);
1718 _lsl(T_Hi, Src0RLo, ShAmtOp);
1719 _mov(DestHi, T_Hi);
1720 }
1721
1722 Operand *_0 =
1723 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1724 _mov(T_Lo, _0);
1725 _mov(DestLo, T_Lo);
1726 return;
1727 }
1728
1729 Variable *Src0RHi = SrcsHi.src0R(this);
1730 Operand *ShAmtOp =
1731 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
1732 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
1733 Legal_Reg | Legal_Flex);
1734 _lsl(T_Hi, Src0RHi, ShAmtOp);
1735 _orr(T_Hi, T_Hi,
1736 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1737 OperandARM32::LSR, ComplShAmtOp));
1738 _mov(DestHi, T_Hi);
1739
1740 _lsl(T_Lo, Src0RLo, ShAmtOp);
1741 _mov(DestLo, T_Lo);
1742 return;
1743 }
1744
1745 // a=b<<c ==>
1746 // pnacl-llc does:
1747 // mov t_b.lo, b.lo
1748 // mov t_b.hi, b.hi
1749 // mov t_c.lo, c.lo
1750 // rsb T0, t_c.lo, #32
1751 // lsr T1, t_b.lo, T0
1752 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1753 // sub T2, t_c.lo, #32
1754 // cmp T2, #0
1755 // lslge t_a.hi, t_b.lo, T2
1756 // lsl t_a.lo, t_b.lo, t_c.lo
1757 // mov a.lo, t_a.lo
1758 // mov a.hi, t_a.hi
1759 //
1760 // GCC 4.8 does:
1761 // sub t_c1, c.lo, #32
1762 // lsl t_hi, b.hi, c.lo
1763 // orr t_hi, t_hi, b.lo, lsl t_c1
1764 // rsb t_c2, c.lo, #32
1765 // orr t_hi, t_hi, b.lo, lsr t_c2
1766 // lsl t_lo, b.lo, c.lo
1767 // a.lo = t_lo
1768 // a.hi = t_hi
1769 //
1770 // These are incompatible, therefore we mimic pnacl-llc.
1771 // Can be strength-reduced for constant-shifts, but we don't do that for
1772 // now.
1773 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1774 // ARM, shifts only take the lower 8 bits of the shift register, and
1775 // saturate to the range 0-32, so the negative value will saturate to 32.
1776 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
Jim Stichnoth 2015/11/16 23:06:26 Maybe this should be named _32RF?
John 2015/11/17 22:17:05 I'd rather not. This is the number 32, not somethi
1777 Operand *_0 =
1778 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1779 Variable *T0 = makeReg(IceType_i32);
1780 Variable *T1 = makeReg(IceType_i32);
1781 Variable *T2 = makeReg(IceType_i32);
1782 Variable *TA_Hi = makeReg(IceType_i32);
1783 Variable *TA_Lo = makeReg(IceType_i32);
1784 Variable *Src0RLo = SrcsLo.src0R(this);
1785 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
1786 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
1787 _rsb(T0, Src1RLo, _32);
1788 _lsr(T1, Src0RLo, T0);
1789 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1790 OperandARM32::LSL, Src1RLo));
1791 _sub(T2, Src1RLo, _32);
1792 _cmp(T2, _0);
1793 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1794 _set_dest_redefined();
1795 _lsl(TA_Lo, Src0RLo, Src1RLo);
1796 _mov(DestLo, TA_Lo);
1797 _mov(DestHi, TA_Hi);
1798 return;
1799 }
1800 case InstArithmetic::Lshr:
1801 case InstArithmetic::Ashr: {
1802 const bool ASR = Op == InstArithmetic::Ashr;
1803 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
1804 Variable *Src0RHi = SrcsHi.src0R(this);
1805 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
1806 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
1807 if (ShAmtImm == 0) {
1808 _mov(DestHi, Src0RHi);
1809 _mov(DestLo, SrcsLo.src0R(this));
1810 return;
1811 }
1812
1813 if (ShAmtImm >= 32) {
1814 if (ShAmtImm == 32) {
1815 _mov(DestLo, Src0RHi);
1816 } else {
1817 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
1818 Legal_Reg | Legal_Flex);
1819 if (ASR) {
1820 _asr(T_Lo, Src0RHi, ShAmtOp);
1821 } else {
1822 _lsr(T_Lo, Src0RHi, ShAmtOp);
1823 }
1824 _mov(DestLo, T_Lo);
1825 }
1826
1827 if (ASR) {
1828 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),
1829 Legal_Reg | Legal_Flex);
1830 _asr(T_Hi, Src0RHi, _31);
1831 } else {
1832 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
1833 Legal_Reg | Legal_Flex);
1834 _mov(T_Hi, _0);
1835 }
1836 _mov(DestHi, T_Hi);
1837 return;
1838 }
1839
1840 Variable *Src0RLo = SrcsLo.src0R(this);
1841 Operand *ShAmtOp =
1842 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
1843 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
1844 Legal_Reg | Legal_Flex);
1845 _lsr(T_Lo, Src0RLo, ShAmtOp);
1846 _orr(T_Lo, T_Lo,
1847 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1848 OperandARM32::LSL, ComplShAmtOp));
1849 _mov(DestLo, T_Lo);
1850
1851 if (ASR) {
1852 _asr(T_Hi, Src0RHi, ShAmtOp);
1853 } else {
1854 _lsr(T_Hi, Src0RHi, ShAmtOp);
1855 }
1856 _mov(DestHi, T_Hi);
1857 return;
1858 }
1859
1860 // a=b>>c
1861 // pnacl-llc does:
1862 // mov t_b.lo, b.lo
1863 // mov t_b.hi, b.hi
1864 // mov t_c.lo, c.lo
1865 // lsr T0, t_b.lo, t_c.lo
1866 // rsb T1, t_c.lo, #32
1867 // orr t_a.lo, T0, t_b.hi, lsl T1
1868 // sub T2, t_c.lo, #32
1869 // cmp T2, #0
1870 // [al]srge t_a.lo, t_b.hi, T2
1871 // [al]sr t_a.hi, t_b.hi, t_c.lo
1872 // mov a.lo, t_a.lo
1873 // mov a.hi, t_a.hi
1874 //
1875 // GCC 4.8 does (lsr):
1876 // rsb t_c1, c.lo, #32
1877 // lsr t_lo, b.lo, c.lo
1878 // orr t_lo, t_lo, b.hi, lsl t_c1
1879 // sub t_c2, c.lo, #32
1880 // orr t_lo, t_lo, b.hi, lsr t_c2
1881 // lsr t_hi, b.hi, c.lo
1882 // mov a.lo, t_lo
1883 // mov a.hi, t_hi
1884 //
1885 // These are incompatible, therefore we mimic pnacl-llc.
1886 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
1887 Operand *_0 =
1888 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1889 Variable *T0 = makeReg(IceType_i32);
1890 Variable *T1 = makeReg(IceType_i32);
1891 Variable *T2 = makeReg(IceType_i32);
1892 Variable *TA_Lo = makeReg(IceType_i32);
1893 Variable *TA_Hi = makeReg(IceType_i32);
1894 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
1895 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
1896 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
1897 _lsr(T0, Src0RLo, Src1RLo);
1898 _rsb(T1, Src1RLo, _32);
1899 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1900 OperandARM32::LSL, T1));
1901 _sub(T2, Src1RLo, _32);
1902 _cmp(T2, _0);
1903 if (ASR) {
1904 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1905 _set_dest_redefined();
1906 _asr(TA_Hi, Src0RHi, Src1RLo);
1907 } else {
1908 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1909 _set_dest_redefined();
1910 _lsr(TA_Hi, Src0RHi, Src1RLo);
1911 }
1912 _mov(DestLo, TA_Lo);
1913 _mov(DestHi, TA_Hi);
1914 return;
1915 }
1916 case InstArithmetic::Fadd:
1917 case InstArithmetic::Fsub:
1918 case InstArithmetic::Fmul:
1919 case InstArithmetic::Fdiv:
1920 case InstArithmetic::Frem:
1921 llvm::report_fatal_error("FP instruction with i64 type");
1922 return;
1923 case InstArithmetic::Udiv:
1924 case InstArithmetic::Sdiv:
1925 case InstArithmetic::Urem:
1926 case InstArithmetic::Srem:
1927 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
1928 "should have already been handled before");
1929 return;
1930 }
1931 }
1932
1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 1933 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1408 Variable *Dest = Inst->getDest(); 1934 Variable *Dest = Inst->getDest();
1409 if (Dest->getType() == IceType_i1) { 1935 if (Dest->getType() == IceType_i1) {
1410 lowerInt1Arithmetic(Inst); 1936 lowerInt1Arithmetic(Inst);
1411 return; 1937 return;
1412 } 1938 }
1413 1939
1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1415 // legalize Src0 to flex or Src1 to flex and there is a reversible
1416 // instruction. E.g., reverse subtract with immediate, register vs register,
1417 // immediate.
1418 // Or it may be the case that the operands aren't swapped, but the bits can
1419 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1420 // instead of AND for some masks.
1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 1940 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 1941 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1423 if (Dest->getType() == IceType_i64) { 1942 if (Dest->getType() == IceType_i64) {
1424 // These helper-call-involved instructions are lowered in this separate 1943 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
1425 // switch. This is because we would otherwise assume that we need to 1944 return;
1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with 1945 }
1427 // helper calls, and such unused/redundant instructions will fail liveness 1946
1428 // analysis under -Om1 setting. 1947 if (isVectorType(Dest->getType())) {
1429 switch (Inst->getOp()) {
1430 default:
1431 break;
1432 case InstArithmetic::Udiv:
1433 case InstArithmetic::Sdiv:
1434 case InstArithmetic::Urem:
1435 case InstArithmetic::Srem: {
1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1438 // register, which will hide a constant source operand. Instead, check
1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1441 if (C64->getValue() == 0) {
1442 _trap();
1443 return;
1444 }
1445 } else {
1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1448 div0Check(IceType_i64, Src1Lo, Src1Hi);
1449 }
1450 // Technically, ARM has their own aeabi routines, but we can use the
1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1452 // the more standard __moddi3 for rem.
1453 const char *HelperName = "";
1454 switch (Inst->getOp()) {
1455 default:
1456 llvm_unreachable("Should have only matched div ops.");
1457 break;
1458 case InstArithmetic::Udiv:
1459 HelperName = H_udiv_i64;
1460 break;
1461 case InstArithmetic::Sdiv:
1462 HelperName = H_sdiv_i64;
1463 break;
1464 case InstArithmetic::Urem:
1465 HelperName = H_urem_i64;
1466 break;
1467 case InstArithmetic::Srem:
1468 HelperName = H_srem_i64;
1469 break;
1470 }
1471 constexpr SizeT MaxSrcs = 2;
1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1473 Call->addArg(Src0);
1474 Call->addArg(Src1);
1475 lowerCall(Call);
1476 return;
1477 }
1478 }
1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0));
1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
1483 Operand *Src1Lo = loOperand(Src1);
1484 Operand *Src1Hi = hiOperand(Src1);
1485 Variable *T_Lo = makeReg(DestLo->getType());
1486 Variable *T_Hi = makeReg(DestHi->getType());
1487 switch (Inst->getOp()) {
1488 case InstArithmetic::_num:
1489 llvm_unreachable("Unknown arithmetic operator");
1490 return;
1491 case InstArithmetic::Add:
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1494 _adds(T_Lo, Src0RLo, Src1Lo);
1495 _mov(DestLo, T_Lo);
1496 _adc(T_Hi, Src0RHi, Src1Hi);
1497 _mov(DestHi, T_Hi);
1498 return;
1499 case InstArithmetic::And:
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1502 _and(T_Lo, Src0RLo, Src1Lo);
1503 _mov(DestLo, T_Lo);
1504 _and(T_Hi, Src0RHi, Src1Hi);
1505 _mov(DestHi, T_Hi);
1506 return;
1507 case InstArithmetic::Or:
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1510 _orr(T_Lo, Src0RLo, Src1Lo);
1511 _mov(DestLo, T_Lo);
1512 _orr(T_Hi, Src0RHi, Src1Hi);
1513 _mov(DestHi, T_Hi);
1514 return;
1515 case InstArithmetic::Xor:
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1518 _eor(T_Lo, Src0RLo, Src1Lo);
1519 _mov(DestLo, T_Lo);
1520 _eor(T_Hi, Src0RHi, Src1Hi);
1521 _mov(DestHi, T_Hi);
1522 return;
1523 case InstArithmetic::Sub:
1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1526 _subs(T_Lo, Src0RLo, Src1Lo);
1527 _mov(DestLo, T_Lo);
1528 _sbc(T_Hi, Src0RHi, Src1Hi);
1529 _mov(DestHi, T_Hi);
1530 return;
1531 case InstArithmetic::Mul: {
1532 // GCC 4.8 does:
1533 // a=b*c ==>
1534 // t_acc =(mul) (b.lo * c.hi)
1535 // t_acc =(mla) (c.lo * b.hi) + t_acc
1536 // t.hi,t.lo =(umull) b.lo * c.lo
1537 // t.hi += t_acc
1538 // a.lo = t.lo
1539 // a.hi = t.hi
1540 //
1541 // LLVM does:
1542 // t.hi,t.lo =(umull) b.lo * c.lo
1543 // t.hi =(mla) (b.lo * c.hi) + t.hi
1544 // t.hi =(mla) (b.hi * c.lo) + t.hi
1545 // a.lo = t.lo
1546 // a.hi = t.hi
1547 //
1548 // LLVM's lowering has fewer instructions, but more register pressure:
1549 // t.lo is live from beginning to end, while GCC delays the two-dest
1550 // instruction till the end, and kills c.hi immediately.
1551 Variable *T_Acc = makeReg(IceType_i32);
1552 Variable *T_Acc1 = makeReg(IceType_i32);
1553 Variable *T_Hi1 = makeReg(IceType_i32);
1554 Variable *Src1RLo = legalizeToReg(Src1Lo);
1555 Variable *Src1RHi = legalizeToReg(Src1Hi);
1556 _mul(T_Acc, Src0RLo, Src1RHi);
1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1559 _add(T_Hi, T_Hi1, T_Acc1);
1560 _mov(DestLo, T_Lo);
1561 _mov(DestHi, T_Hi);
1562 return;
1563 }
1564 case InstArithmetic::Shl: {
1565 // a=b<<c ==>
1566 // pnacl-llc does:
1567 // mov t_b.lo, b.lo
1568 // mov t_b.hi, b.hi
1569 // mov t_c.lo, c.lo
1570 // rsb T0, t_c.lo, #32
1571 // lsr T1, t_b.lo, T0
1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1573 // sub T2, t_c.lo, #32
1574 // cmp T2, #0
1575 // lslge t_a.hi, t_b.lo, T2
1576 // lsl t_a.lo, t_b.lo, t_c.lo
1577 // mov a.lo, t_a.lo
1578 // mov a.hi, t_a.hi
1579 //
1580 // GCC 4.8 does:
1581 // sub t_c1, c.lo, #32
1582 // lsl t_hi, b.hi, c.lo
1583 // orr t_hi, t_hi, b.lo, lsl t_c1
1584 // rsb t_c2, c.lo, #32
1585 // orr t_hi, t_hi, b.lo, lsr t_c2
1586 // lsl t_lo, b.lo, c.lo
1587 // a.lo = t_lo
1588 // a.hi = t_hi
1589 //
1590 // These are incompatible, therefore we mimic pnacl-llc.
1591 // Can be strength-reduced for constant-shifts, but we don't do that for
1592 // now.
1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1594 // ARM, shifts only take the lower 8 bits of the shift register, and
1595 // saturate to the range 0-32, so the negative value will saturate to 32.
1596 Constant *_32 = Ctx->getConstantInt32(32);
1597 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1598 Variable *Src1RLo = legalizeToReg(Src1Lo);
1599 Variable *T0 = makeReg(IceType_i32);
1600 Variable *T1 = makeReg(IceType_i32);
1601 Variable *T2 = makeReg(IceType_i32);
1602 Variable *TA_Hi = makeReg(IceType_i32);
1603 Variable *TA_Lo = makeReg(IceType_i32);
1604 _rsb(T0, Src1RLo, _32);
1605 _lsr(T1, Src0RLo, T0);
1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1607 OperandARM32::LSL, Src1RLo));
1608 _sub(T2, Src1RLo, _32);
1609 _cmp(T2, _0);
1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1611 _set_dest_redefined();
1612 _lsl(TA_Lo, Src0RLo, Src1RLo);
1613 _mov(DestLo, TA_Lo);
1614 _mov(DestHi, TA_Hi);
1615 return;
1616 }
1617 case InstArithmetic::Lshr:
1618 case InstArithmetic::Ashr: {
1619 // a=b>>c
1620 // pnacl-llc does:
1621 // mov t_b.lo, b.lo
1622 // mov t_b.hi, b.hi
1623 // mov t_c.lo, c.lo
1624 // lsr T0, t_b.lo, t_c.lo
1625 // rsb T1, t_c.lo, #32
1626 // orr t_a.lo, T0, t_b.hi, lsl T1
1627 // sub T2, t_c.lo, #32
1628 // cmp T2, #0
1629 // [al]srge t_a.lo, t_b.hi, T2
1630 // [al]sr t_a.hi, t_b.hi, t_c.lo
1631 // mov a.lo, t_a.lo
1632 // mov a.hi, t_a.hi
1633 //
1634 // GCC 4.8 does (lsr):
1635 // rsb t_c1, c.lo, #32
1636 // lsr t_lo, b.lo, c.lo
1637 // orr t_lo, t_lo, b.hi, lsl t_c1
1638 // sub t_c2, c.lo, #32
1639 // orr t_lo, t_lo, b.hi, lsr t_c2
1640 // lsr t_hi, b.hi, c.lo
1641 // mov a.lo, t_lo
1642 // mov a.hi, t_hi
1643 //
1644 // These are incompatible, therefore we mimic pnacl-llc.
1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1646 Constant *_32 = Ctx->getConstantInt32(32);
1647 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1648 Variable *Src1RLo = legalizeToReg(Src1Lo);
1649 Variable *T0 = makeReg(IceType_i32);
1650 Variable *T1 = makeReg(IceType_i32);
1651 Variable *T2 = makeReg(IceType_i32);
1652 Variable *TA_Lo = makeReg(IceType_i32);
1653 Variable *TA_Hi = makeReg(IceType_i32);
1654 _lsr(T0, Src0RLo, Src1RLo);
1655 _rsb(T1, Src1RLo, _32);
1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1657 OperandARM32::LSL, T1));
1658 _sub(T2, Src1RLo, _32);
1659 _cmp(T2, _0);
1660 if (IsAshr) {
1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1662 _set_dest_redefined();
1663 _asr(TA_Hi, Src0RHi, Src1RLo);
1664 } else {
1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1666 _set_dest_redefined();
1667 _lsr(TA_Hi, Src0RHi, Src1RLo);
1668 }
1669 _mov(DestLo, TA_Lo);
1670 _mov(DestHi, TA_Hi);
1671 return;
1672 }
1673 case InstArithmetic::Fadd:
1674 case InstArithmetic::Fsub:
1675 case InstArithmetic::Fmul:
1676 case InstArithmetic::Fdiv:
1677 case InstArithmetic::Frem:
1678 llvm_unreachable("FP instruction with i64 type");
1679 return;
1680 case InstArithmetic::Udiv:
1681 case InstArithmetic::Sdiv:
1682 case InstArithmetic::Urem:
1683 case InstArithmetic::Srem:
1684 llvm_unreachable("Call-helper-involved instruction for i64 type "
1685 "should have already been handled before");
1686 return;
1687 }
1688 return;
1689 } else if (isVectorType(Dest->getType())) {
1690 // Add a fake def to keep liveness consistent in the meantime. 1948 // Add a fake def to keep liveness consistent in the meantime.
1691 Variable *T = makeReg(Dest->getType()); 1949 Variable *T = makeReg(Dest->getType());
1692 Context.insert(InstFakeDef::create(Func, T)); 1950 Context.insert(InstFakeDef::create(Func, T));
1693 _mov(Dest, T); 1951 _mov(Dest, T);
1694 UnimplementedError(Func->getContext()->getFlags()); 1952 UnimplementedError(Func->getContext()->getFlags());
1695 return; 1953 return;
1696 } 1954 }
1955
1697 // Dest->getType() is a non-i64 scalar. 1956 // Dest->getType() is a non-i64 scalar.
1698 Variable *Src0R = legalizeToReg(Src0);
1699 Variable *T = makeReg(Dest->getType()); 1957 Variable *T = makeReg(Dest->getType());
1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect 1958
1959 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
1701 // whether or not Src1 is a non-zero constant. Once legalized it is more 1960 // whether or not Src1 is a non-zero constant. Once legalized it is more
1702 // difficult to determine (constant may be moved to a register). 1961 // difficult to determine (constant may be moved to a register).
1962 // * Handle floating point arithmetic separately: they require Src1 to be
1963 // legalized to a register.
1703 switch (Inst->getOp()) { 1964 switch (Inst->getOp()) {
1704 default: 1965 default:
1705 break; 1966 break;
1706 case InstArithmetic::Udiv: { 1967 case InstArithmetic::Udiv: {
1707 constexpr bool NotRemainder = false; 1968 constexpr bool NotRemainder = false;
1969 Variable *Src0R = legalizeToReg(Src0);
1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 1970 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1709 H_udiv_i32, NotRemainder); 1971 H_udiv_i32, NotRemainder);
1710 return; 1972 return;
1711 } 1973 }
1712 case InstArithmetic::Sdiv: { 1974 case InstArithmetic::Sdiv: {
1713 constexpr bool NotRemainder = false; 1975 constexpr bool NotRemainder = false;
1976 Variable *Src0R = legalizeToReg(Src0);
1714 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, 1977 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1715 H_sdiv_i32, NotRemainder); 1978 H_sdiv_i32, NotRemainder);
1716 return; 1979 return;
1717 } 1980 }
1718 case InstArithmetic::Urem: { 1981 case InstArithmetic::Urem: {
1719 constexpr bool IsRemainder = true; 1982 constexpr bool IsRemainder = true;
1983 Variable *Src0R = legalizeToReg(Src0);
1720 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 1984 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1721 H_urem_i32, IsRemainder); 1985 H_urem_i32, IsRemainder);
1722 return; 1986 return;
1723 } 1987 }
1724 case InstArithmetic::Srem: { 1988 case InstArithmetic::Srem: {
1725 constexpr bool IsRemainder = true; 1989 constexpr bool IsRemainder = true;
1990 Variable *Src0R = legalizeToReg(Src0);
1726 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, 1991 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
1727 H_srem_i32, IsRemainder); 1992 H_srem_i32, IsRemainder);
1728 return; 1993 return;
1729 } 1994 }
1730 case InstArithmetic::Frem: { 1995 case InstArithmetic::Frem: {
1731 const SizeT MaxSrcs = 2; 1996 constexpr SizeT MaxSrcs = 2;
1997 Variable *Src0R = legalizeToReg(Src0);
1732 Type Ty = Dest->getType(); 1998 Type Ty = Dest->getType();
1733 InstCall *Call = makeHelperCall( 1999 InstCall *Call = makeHelperCall(
1734 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); 2000 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1735 Call->addArg(Src0R); 2001 Call->addArg(Src0R);
1736 Call->addArg(Src1); 2002 Call->addArg(Src1);
1737 lowerCall(Call); 2003 lowerCall(Call);
1738 return; 2004 return;
1739 } 2005 }
1740 }
1741
1742 // Handle floating point arithmetic separately: they require Src1 to be
1743 // legalized to a register.
1744 switch (Inst->getOp()) {
1745 default:
1746 break;
1747 case InstArithmetic::Fadd: { 2006 case InstArithmetic::Fadd: {
2007 Variable *Src0R = legalizeToReg(Src0);
1748 Variable *Src1R = legalizeToReg(Src1); 2008 Variable *Src1R = legalizeToReg(Src1);
1749 _vadd(T, Src0R, Src1R); 2009 _vadd(T, Src0R, Src1R);
1750 _mov(Dest, T); 2010 _mov(Dest, T);
1751 return; 2011 return;
1752 } 2012 }
1753 case InstArithmetic::Fsub: { 2013 case InstArithmetic::Fsub: {
2014 Variable *Src0R = legalizeToReg(Src0);
1754 Variable *Src1R = legalizeToReg(Src1); 2015 Variable *Src1R = legalizeToReg(Src1);
1755 _vsub(T, Src0R, Src1R); 2016 _vsub(T, Src0R, Src1R);
1756 _mov(Dest, T); 2017 _mov(Dest, T);
1757 return; 2018 return;
1758 } 2019 }
1759 case InstArithmetic::Fmul: { 2020 case InstArithmetic::Fmul: {
2021 Variable *Src0R = legalizeToReg(Src0);
1760 Variable *Src1R = legalizeToReg(Src1); 2022 Variable *Src1R = legalizeToReg(Src1);
1761 _vmul(T, Src0R, Src1R); 2023 _vmul(T, Src0R, Src1R);
1762 _mov(Dest, T); 2024 _mov(Dest, T);
1763 return; 2025 return;
1764 } 2026 }
1765 case InstArithmetic::Fdiv: { 2027 case InstArithmetic::Fdiv: {
2028 Variable *Src0R = legalizeToReg(Src0);
1766 Variable *Src1R = legalizeToReg(Src1); 2029 Variable *Src1R = legalizeToReg(Src1);
1767 _vdiv(T, Src0R, Src1R); 2030 _vdiv(T, Src0R, Src1R);
1768 _mov(Dest, T); 2031 _mov(Dest, T);
1769 return; 2032 return;
1770 } 2033 }
1771 } 2034 }
1772 2035
1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); 2036 // Handle everything else here.
2037 Int32Operands Srcs(Src0, Src1);
1774 switch (Inst->getOp()) { 2038 switch (Inst->getOp()) {
1775 case InstArithmetic::_num: 2039 case InstArithmetic::_num:
1776 llvm_unreachable("Unknown arithmetic operator"); 2040 llvm::report_fatal_error("Unknown arithmetic operator");
1777 return; 2041 return;
1778 case InstArithmetic::Add: 2042 case InstArithmetic::Add:
1779 _add(T, Src0R, Src1RF); 2043 if (Srcs.hasConstOperand()) {
2044 if (!Srcs.immediateIsFlexEncodable() &&
2045 Srcs.negatedImmediateIsFlexEncodable()) {
2046 Variable *Src0R = Srcs.src0R(this);
2047 Operand *Src1F = Srcs.negatedSrc1F(this);
2048 if (!Srcs.swappedOperands()) {
2049 _sub(T, Src0R, Src1F);
2050 } else {
2051 _rsb(T, Src0R, Src1F);
2052 }
2053 _mov(Dest, T);
2054 return;
2055 }
2056 }
2057 _add(T, Srcs.src0R(this), Srcs.src1RF(this));
1780 _mov(Dest, T); 2058 _mov(Dest, T);
1781 return; 2059 return;
1782 case InstArithmetic::And: 2060 case InstArithmetic::And:
1783 _and(T, Src0R, Src1RF); 2061 if (Srcs.hasConstOperand()) {
2062 if (!Srcs.immediateIsFlexEncodable() &&
2063 Srcs.invertedImmediateIsFlexEncodable()) {
2064 Variable *Src0R = Srcs.src0R(this);
2065 Operand *Src1F = Srcs.invertedSrc1F(this);
2066 _bic(T, Src0R, Src1F);
2067 _mov(Dest, T);
2068 return;
2069 }
2070 }
2071 _and(T, Srcs.src0R(this), Srcs.src1RF(this));
1784 _mov(Dest, T); 2072 _mov(Dest, T);
1785 return; 2073 return;
1786 case InstArithmetic::Or: 2074 case InstArithmetic::Or:
1787 _orr(T, Src0R, Src1RF); 2075 _orr(T, Srcs.src0R(this), Srcs.src1RF(this));
1788 _mov(Dest, T); 2076 _mov(Dest, T);
1789 return; 2077 return;
1790 case InstArithmetic::Xor: 2078 case InstArithmetic::Xor:
1791 _eor(T, Src0R, Src1RF); 2079 _eor(T, Srcs.src0R(this), Srcs.src1RF(this));
1792 _mov(Dest, T); 2080 _mov(Dest, T);
1793 return; 2081 return;
1794 case InstArithmetic::Sub: 2082 case InstArithmetic::Sub:
1795 _sub(T, Src0R, Src1RF); 2083 if (Srcs.hasConstOperand()) {
2084 if (Srcs.immediateIsFlexEncodable()) {
2085 if (Srcs.swappedOperands()) {
2086 _rsb(T, Srcs.src0R(this), Srcs.src1RF(this));
2087 } else {
2088 _sub(T, Srcs.src0R(this), Srcs.src1RF(this));
2089 }
2090 _mov(Dest, T);
2091 return;
2092 }
2093 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
2094 _add(T, Srcs.src0R(this), Srcs.negatedSrc1F(this));
2095 _mov(Dest, T);
2096 return;
2097 }
2098 }
2099 _sub(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1R(this));
1796 _mov(Dest, T); 2100 _mov(Dest, T);
1797 return; 2101 return;
1798 case InstArithmetic::Mul: { 2102 case InstArithmetic::Mul: {
1799 Variable *Src1R = legalizeToReg(Src1RF); 2103 _mul(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1R(this));
1800 _mul(T, Src0R, Src1R);
1801 _mov(Dest, T); 2104 _mov(Dest, T);
1802 return; 2105 return;
1803 } 2106 }
1804 case InstArithmetic::Shl: 2107 case InstArithmetic::Shl: {
1805 _lsl(T, Src0R, Src1RF); 2108 _lsl(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1RF(this));
1806 _mov(Dest, T); 2109 _mov(Dest, T);
1807 return; 2110 return;
1808 case InstArithmetic::Lshr: 2111 }
2112 case InstArithmetic::Lshr: {
2113 Variable *Src0R = Srcs.unswappedSrc0R(this);
1809 if (Dest->getType() != IceType_i32) { 2114 if (Dest->getType() != IceType_i32) {
1810 _uxt(Src0R, Src0R); 2115 _uxt(Src0R, Src0R);
1811 } 2116 }
1812 _lsr(T, Src0R, Src1RF); 2117 _lsr(T, Src0R, Srcs.unswappedSrc1RF(this));
1813 _mov(Dest, T); 2118 _mov(Dest, T);
1814 return; 2119 return;
1815 case InstArithmetic::Ashr: 2120 }
2121 case InstArithmetic::Ashr: {
2122 Variable *Src0R = Srcs.unswappedSrc0R(this);
1816 if (Dest->getType() != IceType_i32) { 2123 if (Dest->getType() != IceType_i32) {
1817 _sxt(Src0R, Src0R); 2124 _sxt(Src0R, Src0R);
1818 } 2125 }
1819 _asr(T, Src0R, Src1RF); 2126 _asr(T, Src0R, Srcs.unswappedSrc1RF(this));
1820 _mov(Dest, T); 2127 _mov(Dest, T);
1821 return; 2128 return;
2129 }
1822 case InstArithmetic::Udiv: 2130 case InstArithmetic::Udiv:
1823 case InstArithmetic::Sdiv: 2131 case InstArithmetic::Sdiv:
1824 case InstArithmetic::Urem: 2132 case InstArithmetic::Urem:
1825 case InstArithmetic::Srem: 2133 case InstArithmetic::Srem:
1826 llvm_unreachable("Integer div/rem should have been handled earlier."); 2134 llvm::report_fatal_error(
2135 "Integer div/rem should have been handled earlier.");
1827 return; 2136 return;
1828 case InstArithmetic::Fadd: 2137 case InstArithmetic::Fadd:
1829 case InstArithmetic::Fsub: 2138 case InstArithmetic::Fsub:
1830 case InstArithmetic::Fmul: 2139 case InstArithmetic::Fmul:
1831 case InstArithmetic::Fdiv: 2140 case InstArithmetic::Fdiv:
1832 case InstArithmetic::Frem: 2141 case InstArithmetic::Frem:
1833 llvm_unreachable("Floating point arith should have been handled earlier."); 2142 llvm::report_fatal_error(
2143 "Floating point arith should have been handled earlier.");
1834 return; 2144 return;
1835 } 2145 }
1836 } 2146 }
1837 2147
1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { 2148 void TargetARM32::lowerAssign(const InstAssign *Inst) {
1839 Variable *Dest = Inst->getDest(); 2149 Variable *Dest = Inst->getDest();
1840 Operand *Src0 = Inst->getSrc(0); 2150 Operand *Src0 = Inst->getSrc(0);
1841 assert(Dest->getType() == Src0->getType()); 2151 assert(Dest->getType() == Src0->getType());
1842 if (Dest->getType() == IceType_i64) { 2152 if (Dest->getType() == IceType_i64) {
1843 Src0 = legalizeUndef(Src0); 2153 Src0 = legalizeUndef(Src0);
2154
2155 Variable *T_Lo = makeReg(IceType_i32);
2156 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 2157 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1848 Variable *T_Lo = makeReg(IceType_i32);
1849 Variable *T_Hi = makeReg(IceType_i32);
1850
1851 _mov(T_Lo, Src0Lo); 2158 _mov(T_Lo, Src0Lo);
1852 _mov(DestLo, T_Lo); 2159 _mov(DestLo, T_Lo);
2160
2161 Variable *T_Hi = makeReg(IceType_i32);
2162 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2163 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1853 _mov(T_Hi, Src0Hi); 2164 _mov(T_Hi, Src0Hi);
1854 _mov(DestHi, T_Hi); 2165 _mov(DestHi, T_Hi);
2166
2167 return;
2168 }
2169
2170 Operand *NewSrc;
2171 if (Dest->hasReg()) {
2172 // If Dest already has a physical register, then legalize the Src operand
2173 // into a Variable with the same register assignment. This especially
2174 // helps allow the use of Flex operands.
2175 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1855 } else { 2176 } else {
1856 Operand *NewSrc; 2177 // Dest could be a stack operand. Since we could potentially need to do a
1857 if (Dest->hasReg()) { 2178 // Store (and store can only have Register operands), legalize this to a
1858 // If Dest already has a physical register, then legalize the Src operand 2179 // register.
1859 // into a Variable with the same register assignment. This especially 2180 NewSrc = legalize(Src0, Legal_Reg);
1860 // helps allow the use of Flex operands.
1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1862 } else {
1863 // Dest could be a stack operand. Since we could potentially need to do a
1864 // Store (and store can only have Register operands), legalize this to a
1865 // register.
1866 NewSrc = legalize(Src0, Legal_Reg);
1867 }
1868 if (isVectorType(Dest->getType())) {
1869 Variable *SrcR = legalizeToReg(NewSrc);
1870 _mov(Dest, SrcR);
1871 } else if (isFloatingType(Dest->getType())) {
1872 Variable *SrcR = legalizeToReg(NewSrc);
1873 _mov(Dest, SrcR);
1874 } else {
1875 _mov(Dest, NewSrc);
1876 }
1877 } 2181 }
2182
2183 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
2184 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
2185 }
2186 _mov(Dest, NewSrc);
1878 } 2187 }
1879 2188
1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( 2189 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 2190 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { 2191 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
1883 InstARM32Label *NewShortCircuitLabel = nullptr; 2192 InstARM32Label *NewShortCircuitLabel = nullptr;
1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 2193 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
1885 2194
1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); 2195 const Inst *Producer = BoolComputations.getProducerOf(Boolean);
1887 2196
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after
2573 struct { 2882 struct {
2574 CondARM32::Cond CC0; 2883 CondARM32::Cond CC0;
2575 CondARM32::Cond CC1; 2884 CondARM32::Cond CC1;
2576 } TableFcmp[] = { 2885 } TableFcmp[] = {
2577 #define X(val, CC0, CC1) \ 2886 #define X(val, CC0, CC1) \
2578 { CondARM32::CC0, CondARM32::CC1 } \ 2887 { CondARM32::CC0, CondARM32::CC1 } \
2579 , 2888 ,
2580 FCMPARM32_TABLE 2889 FCMPARM32_TABLE
2581 #undef X 2890 #undef X
2582 }; 2891 };
2892
2893 bool isFloatingPointZero(Operand *Src) {
2894 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {
2895 return Utils::isPositiveZero(F32->getValue());
2896 }
2897
2898 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {
2899 return Utils::isPositiveZero(F64->getValue());
2900 }
2901
2902 return false;
2903 }
2583 } // end of anonymous namespace 2904 } // end of anonymous namespace
2584 2905
2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { 2906 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
2586 InstFcmp::FCond Condition = Instr->getCondition(); 2907 InstFcmp::FCond Condition = Instr->getCondition();
2587 switch (Condition) { 2908 switch (Condition) {
2588 case InstFcmp::False: 2909 case InstFcmp::False:
2589 return CondWhenTrue(CondARM32::kNone); 2910 return CondWhenTrue(CondARM32::kNone);
2590 case InstFcmp::True: 2911 case InstFcmp::True:
2591 return CondWhenTrue(CondARM32::AL); 2912 return CondWhenTrue(CondARM32::AL);
2592 break; 2913 break;
2593 default: { 2914 default: {
2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); 2915 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); 2916 Operand *Src1 = Instr->getSrc(1);
2596 _vcmp(Src0R, Src1R); 2917 if (isFloatingPointZero(Src1)) {
2918 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
2919 } else {
2920 _vcmp(Src0R, legalizeToReg(Src1));
2921 }
2597 _vmrs(); 2922 _vmrs();
2598 assert(Condition < llvm::array_lengthof(TableFcmp)); 2923 assert(Condition < llvm::array_lengthof(TableFcmp));
2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); 2924 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
2600 } 2925 }
2601 } 2926 }
2602 } 2927 }
2603 2928
2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { 2929 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
2605 Variable *Dest = Instr->getDest(); 2930 Variable *Dest = Instr->getDest();
2606 if (isVectorType(Dest->getType())) { 2931 if (isVectorType(Dest->getType())) {
(...skipping 28 matching lines...) Expand all
2635 _mov(T, _1, Cond.WhenTrue0); 2960 _mov(T, _1, Cond.WhenTrue0);
2636 } 2961 }
2637 2962
2638 if (Cond.WhenTrue1 != CondARM32::kNone) { 2963 if (Cond.WhenTrue1 != CondARM32::kNone) {
2639 _mov_redefined(T, _1, Cond.WhenTrue1); 2964 _mov_redefined(T, _1, Cond.WhenTrue1);
2640 } 2965 }
2641 2966
2642 _mov(Dest, T); 2967 _mov(Dest, T);
2643 } 2968 }
2644 2969
2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { 2970 TargetARM32::CondWhenTrue
2646 assert(Inst->getSrc(0)->getType() != IceType_i1); 2971 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
2647 assert(Inst->getSrc(1)->getType() != IceType_i1); 2972 Operand *Src1) {
2973 size_t Index = static_cast<size_t>(Condition);
2974 assert(Index < llvm::array_lengthof(TableIcmp64));
2648 2975
2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2976 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 2977 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2978 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2979 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2980
2981 if (SrcsLo.hasConstOperand()) {
2982 const uint32_t ValueLo = SrcsLo.getConstantValue();
2983 const uint32_t ValueHi = SrcsHi.getConstantValue();
2984 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
2985 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
2986 Value == 0) {
2987 Variable *T = makeReg(IceType_i32);
2988 _orrs(T, SrcsLo.src0R(this), SrcsHi.src0R(this));
2989 Context.insert(InstFakeUse::create(Func, T));
2990 return CondWhenTrue(TableIcmp64[Index].C1);
2991 }
2992
2993 Variable *Src0RLo = SrcsLo.src0R(this);
2994 Variable *Src0RHi = SrcsHi.src0R(this);
2995 Operand *Src1RFLo = SrcsLo.src1RF(this);
2996 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
2997
2998 const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands();
2999
3000 if (UseRsb) {
3001 if (TableIcmp64[Index].IsSigned) {
3002 Variable *T = makeReg(IceType_i32);
3003 _rsbs(T, Src0RLo, Src1RFLo);
3004 Context.insert(InstFakeUse::create(Func, T));
3005
3006 T = makeReg(IceType_i32);
3007 _rscs(T, Src0RHi, Src1RFHi);
3008 // We need to add a FakeUse here because liveness gets mad at us (Def
3009 // without Use.) Note that flag-setting instructions are considered to
3010 // have side effects and, therefore, are not DCE'ed.
3011 Context.insert(InstFakeUse::create(Func, T));
3012 } else {
3013 Variable *T = makeReg(IceType_i32);
3014 _rsbs(T, Src0RHi, Src1RFHi);
3015 Context.insert(InstFakeUse::create(Func, T));
3016
3017 T = makeReg(IceType_i32);
3018 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
3019 Context.insert(InstFakeUse::create(Func, T));
3020 }
3021 } else {
3022 if (TableIcmp64[Index].IsSigned) {
3023 _cmp(Src0RLo, Src1RFLo);
3024 Variable *T = makeReg(IceType_i32);
3025 _sbcs(T, Src0RHi, Src1RFHi);
3026 Context.insert(InstFakeUse::create(Func, T));
3027 } else {
3028 _cmp(Src0RHi, Src1RFHi);
3029 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
3030 }
3031 }
3032
3033 return CondWhenTrue(TableIcmp64[Index].C1);
3034 }
3035
3036 Variable *Src0RLo, *Src0RHi;
3037 Operand *Src1RFLo, *Src1RFHi;
3038 if (TableIcmp64[Index].Swapped) {
3039 Src0RLo = legalizeToReg(loOperand(Src1));
3040 Src0RHi = legalizeToReg(hiOperand(Src1));
3041 Src1RFLo = legalizeToReg(loOperand(Src0));
3042 Src1RFHi = legalizeToReg(hiOperand(Src0));
3043 } else {
3044 Src0RLo = legalizeToReg(loOperand(Src0));
3045 Src0RHi = legalizeToReg(hiOperand(Src0));
3046 Src1RFLo = legalizeToReg(loOperand(Src1));
3047 Src1RFHi = legalizeToReg(hiOperand(Src1));
3048 }
2651 3049
2652 // a=icmp cond, b, c ==> 3050 // a=icmp cond, b, c ==>
2653 // GCC does: 3051 // GCC does:
2654 // cmp b.hi, c.hi or cmp b.lo, c.lo 3052 // cmp b.hi, c.hi or cmp b.lo, c.lo
2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 3053 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2656 // mov.<C1> t, #1 mov.<C1> t, #1 3054 // mov.<C1> t, #1 mov.<C1> t, #1
2657 // mov.<C2> t, #0 mov.<C2> t, #0 3055 // mov.<C2> t, #0 mov.<C2> t, #0
2658 // mov a, t mov a, t 3056 // mov a, t mov a, t
2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 3057 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2660 // is used for signed compares. In some cases, b and c need to be swapped as 3058 // is used for signed compares. In some cases, b and c need to be swapped as
(...skipping 10 matching lines...) Expand all
2671 // that's nice in that it's just as short but has fewer dependencies for 3069 // that's nice in that it's just as short but has fewer dependencies for
2672 // better ILP at the cost of more registers. 3070 // better ILP at the cost of more registers.
2673 // 3071 //
2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two 3072 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2675 // unconditional mov #0, two cmps, two conditional mov #1, and one 3073 // unconditional mov #0, two cmps, two conditional mov #1, and one
2676 // conditional reg mov. That has few dependencies for good ILP, but is a 3074 // conditional reg mov. That has few dependencies for good ILP, but is a
2677 // longer sequence. 3075 // longer sequence.
2678 // 3076 //
2679 // So, we are going with the GCC version since it's usually better (except 3077 // So, we are going with the GCC version since it's usually better (except
2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 3078 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
3079 if (TableIcmp64[Index].IsSigned) {
3080 Variable *ScratchReg = makeReg(IceType_i32);
3081 _cmp(Src0RLo, Src1RFLo);
3082 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
3083 // ScratchReg isn't going to be used, but we need the side-effect of
3084 // setting flags from this operation.
3085 Context.insert(InstFakeUse::create(Func, ScratchReg));
3086 } else {
3087 _cmp(Src0RHi, Src1RFHi);
3088 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
3089 }
3090 return CondWhenTrue(TableIcmp64[Index].C1);
3091 }
2681 3092
2682 if (Src0->getType() == IceType_i64) { 3093 TargetARM32::CondWhenTrue
2683 InstIcmp::ICond Conditon = Inst->getCondition(); 3094 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
2684 size_t Index = static_cast<size_t>(Conditon); 3095 Operand *Src1) {
2685 assert(Index < llvm::array_lengthof(TableIcmp64)); 3096 Int32Operands Srcs(Src0, Src1);
2686 Variable *Src0Lo, *Src0Hi; 3097 if (!Srcs.hasConstOperand()) {
2687 Operand *Src1LoRF, *Src1HiRF; 3098
2688 if (TableIcmp64[Index].Swapped) { 3099 Variable *Src0R = Srcs.src0R(this);
2689 Src0Lo = legalizeToReg(loOperand(Src1)); 3100 Operand *Src1RF = Srcs.src1RF(this);
2690 Src0Hi = legalizeToReg(hiOperand(Src1)); 3101 _cmp(Src0R, Src1RF);
2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 3102 return CondWhenTrue(getIcmp32Mapping(Condition));
2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2693 } else {
2694 Src0Lo = legalizeToReg(loOperand(Src0));
2695 Src0Hi = legalizeToReg(hiOperand(Src0));
2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2698 }
2699 if (TableIcmp64[Index].IsSigned) {
2700 Variable *ScratchReg = makeReg(IceType_i32);
2701 _cmp(Src0Lo, Src1LoRF);
2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2703 // ScratchReg isn't going to be used, but we need the side-effect of
2704 // setting flags from this operation.
2705 Context.insert(InstFakeUse::create(Func, ScratchReg));
2706 } else {
2707 _cmp(Src0Hi, Src1HiRF);
2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2709 }
2710 return CondWhenTrue(TableIcmp64[Index].C1);
2711 } 3103 }
2712 3104
3105 Variable *Src0R = Srcs.src0R(this);
3106 const int32_t Value = Srcs.getConstantValue();
3107 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
3108 _tst(Src0R, Src0R);
3109 return CondWhenTrue(getIcmp32Mapping(Condition));
3110 }
3111
3112 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
3113 Srcs.negatedImmediateIsFlexEncodable()) {
3114 Operand *Src1F = Srcs.negatedSrc1F(this);
3115 _cmn(Src0R, Src1F);
3116 return CondWhenTrue(getIcmp32Mapping(Condition));
3117 }
3118
3119 Operand *Src1RF = Srcs.src1RF(this);
3120 if (!Srcs.swappedOperands()) {
3121 _cmp(Src0R, Src1RF);
3122 } else {
3123 Variable *T = makeReg(IceType_i32);
3124 _rsbs(T, Src0R, Src1RF);
3125 Context.insert(InstFakeUse::create(Func, T));
3126 }
3127 return CondWhenTrue(getIcmp32Mapping(Condition));
3128 }
3129
3130 TargetARM32::CondWhenTrue
3131 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
3132 Operand *Src1) {
3133 Int32Operands Srcs(Src0, Src1);
3134 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
3135 assert(ShAmt >= 0);
3136
3137 if (!Srcs.hasConstOperand()) {
3138 Variable *Src0R = makeReg(IceType_i32);
3139 Operand *ShAmtF =
3140 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex);
3141 _lsl(Src0R, legalizeToReg(Src0), ShAmtF);
3142
3143 Variable *Src1R = legalizeToReg(Src1);
3144 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(
3145 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);
3146 _cmp(Src0R, Src1F);
3147 return CondWhenTrue(getIcmp32Mapping(Condition));
3148 }
3149
3150 const int32_t Value = Srcs.getConstantValue();
3151 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
3152 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);
3153 Variable *T = makeReg(IceType_i32);
3154 _lsls(T, Srcs.src0R(this), ShAmtOp);
3155 Context.insert(InstFakeUse::create(Func, T));
3156 return CondWhenTrue(getIcmp32Mapping(Condition));
3157 }
3158
3159 Variable *ConstR = makeReg(IceType_i32);
3160 _mov(ConstR,
3161 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
3162 Operand *NonConstF = OperandARM32FlexReg::create(
3163 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
3164 Ctx->getConstantInt32(ShAmt));
3165
3166 if (Srcs.swappedOperands()) {
3167 _cmp(ConstR, NonConstF);
3168 } else {
3169 Variable *T = makeReg(IceType_i32);
3170 _rsbs(T, ConstR, NonConstF);
3171 Context.insert(InstFakeUse::create(Func, T));
3172 }
3173 return CondWhenTrue(getIcmp32Mapping(Condition));
3174 }
3175
3176 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
3177 assert(Inst->getSrc(0)->getType() != IceType_i1);
3178 assert(Inst->getSrc(1)->getType() != IceType_i1);
3179
3180 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
3181 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
3182
3183 const InstIcmp::ICond Condition = Inst->getCondition();
2713 // a=icmp cond b, c ==> 3184 // a=icmp cond b, c ==>
2714 // GCC does: 3185 // GCC does:
2715 // <u/s>xtb tb, b 3186 // <u/s>xtb tb, b
2716 // <u/s>xtb tc, c 3187 // <u/s>xtb tc, c
2717 // cmp tb, tc 3188 // cmp tb, tc
2718 // mov.C1 t, #0 3189 // mov.C1 t, #0
2719 // mov.C2 t, #1 3190 // mov.C2 t, #1
2720 // mov a, t 3191 // mov a, t
2721 // where the unsigned/sign extension is not needed for 32-bit. They also have 3192 // where the unsigned/sign extension is not needed for 32-bit. They also have
2722 // special cases for EQ and NE. E.g., for NE: 3193 // special cases for EQ and NE. E.g., for NE:
2723 // <extend to tb, tc> 3194 // <extend to tb, tc>
2724 // subs t, tb, tc 3195 // subs t, tb, tc
2725 // movne t, #1 3196 // movne t, #1
2726 // mov a, t 3197 // mov a, t
2727 // 3198 //
2728 // LLVM does: 3199 // LLVM does:
2729 // lsl tb, b, #<N> 3200 // lsl tb, b, #<N>
2730 // mov t, #0 3201 // mov t, #0
2731 // cmp tb, c, lsl #<N> 3202 // cmp tb, c, lsl #<N>
2732 // mov.<C> t, #1 3203 // mov.<C> t, #1
2733 // mov a, t 3204 // mov a, t
2734 // 3205 //
2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on 3206 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For 3207 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb 3208 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2738 // first. It's not clear to me why that special-casing is needed. 3209 // first. It's not clear to me why that special-casing is needed.
2739 // 3210 //
2740 // We'll go with the LLVM way for now, since it's shorter and has just as few 3211 // We'll go with the LLVM way for now, since it's shorter and has just as few
2741 // dependencies. 3212 // dependencies.
2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 3213 switch (Src0->getType()) {
2743 assert(ShiftAmt >= 0); 3214 default:
2744 Constant *ShiftConst = nullptr; 3215 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
2745 Variable *Src0R = nullptr; 3216 case IceType_i8:
2746 if (ShiftAmt) { 3217 case IceType_i16:
2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); 3218 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
2748 Src0R = makeReg(IceType_i32); 3219 case IceType_i32:
2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); 3220 return lowerInt32IcmpCond(Condition, Src0, Src1);
2750 } else { 3221 case IceType_i64:
2751 Src0R = legalizeToReg(Src0); 3222 return lowerInt64IcmpCond(Condition, Src0, Src1);
2752 } 3223 }
2753 if (ShiftAmt) {
2754 Variable *Src1R = legalizeToReg(Src1);
2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
2757 _cmp(Src0R, Src1RShifted);
2758 } else {
2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2760 _cmp(Src0R, Src1RF);
2761 }
2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));
2763 } 3224 }
2764 3225
2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { 3226 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
2766 Variable *Dest = Inst->getDest(); 3227 Variable *Dest = Inst->getDest();
2767 3228
2768 if (isVectorType(Dest->getType())) { 3229 if (isVectorType(Dest->getType())) {
2769 Variable *T = makeReg(Dest->getType()); 3230 Variable *T = makeReg(Dest->getType());
2770 Context.insert(InstFakeDef::create(Func, T)); 3231 Context.insert(InstFakeDef::create(Func, T));
2771 _mov(Dest, T); 3232 _mov(Dest, T);
2772 UnimplementedError(Func->getContext()->getFlags()); 3233 UnimplementedError(Func->getContext()->getFlags());
(...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after
4247 } 4708 }
4248 return Reg; 4709 return Reg;
4249 } 4710 }
4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 4711 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
4251 Variable *Reg = makeReg(Ty, RegNum); 4712 Variable *Reg = makeReg(Ty, RegNum);
4252 _movw(Reg, C); 4713 _movw(Reg, C);
4253 _movt(Reg, C); 4714 _movt(Reg, C);
4254 return Reg; 4715 return Reg;
4255 } else { 4716 } else {
4256 assert(isScalarFloatingType(Ty)); 4717 assert(isScalarFloatingType(Ty));
4718 uint32_t ModifiedImm;
4719 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
4720 Variable *T = makeReg(Ty, RegNum);
4721 _mov(T,
4722 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
4723 return T;
4724 }
4725
4726 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
4727 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
4728 // because ARM does not have a veor instruction with S registers.
4729 Variable *T = makeReg(IceType_f64, RegNum);
4730 Context.insert(InstFakeDef::create(Func, T));
4731 _veor(T, T, T);
4732 return T;
4733 }
4734
4257 // Load floats/doubles from literal pool. 4735 // Load floats/doubles from literal pool.
4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an
4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
4260 // immediate constants". Or, for 32-bit floating point numbers, just
4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG
4262 // instead of using a movw/movt pair to get the const-pool address then
4263 // loading to SREG.
4264 std::string Buffer; 4736 std::string Buffer;
4265 llvm::raw_string_ostream StrBuf(Buffer); 4737 llvm::raw_string_ostream StrBuf(Buffer);
4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); 4738 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
4267 llvm::cast<Constant>(From)->setShouldBePooled(true); 4739 llvm::cast<Constant>(From)->setShouldBePooled(true);
4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 4740 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
4269 Variable *BaseReg = makeReg(getPointerType()); 4741 Variable *BaseReg = makeReg(getPointerType());
4270 _movw(BaseReg, Offset); 4742 _movw(BaseReg, Offset);
4271 _movt(BaseReg, Offset); 4743 _movt(BaseReg, Offset);
4272 From = formMemoryOperand(BaseReg, Ty); 4744 From = formMemoryOperand(BaseReg, Ty);
4273 return copyToReg(From, RegNum); 4745 return copyToReg(From, RegNum);
(...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5371 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
4900 // However, for compatibility with current NaCl LLVM, don't claim that. 5372 // However, for compatibility with current NaCl LLVM, don't claim that.
4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5373 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
4902 } 5374 }
4903 5375
4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5376 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5377 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
4906 llvm::SmallBitVector TargetARM32::ScratchRegs; 5378 llvm::SmallBitVector TargetARM32::ScratchRegs;
4907 5379
4908 } // end of namespace Ice 5380 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86BaseImpl.h » ('j') | src/IceUtils.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698