src/IceTargetLoweringARM32.cpp - Issue 1438773004: Subzero. ARM32. Improve constant lowering.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1279 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1290 }	1290 }

1291 _mov(Dest, SP);	1291 _mov(Dest, SP);

1292 }	1292 }

1293	1293

1294 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {	1294 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {

1295 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))	1295 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))

1296 return;	1296 return;

1297 Variable *SrcLoReg = legalizeToReg(SrcLo);	1297 Variable *SrcLoReg = legalizeToReg(SrcLo);

1298 switch (Ty) {	1298 switch (Ty) {

1299 default:	1299 default:

1300 llvm_unreachable("Unexpected type");	1300 llvm::report_fatal_error("Unexpected type");

1301 case IceType_i8: {	1301 case IceType_i8:

1302 Operand *Mask =

1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg \| Legal_Flex);

1304 _tst(SrcLoReg, Mask);

1305 break;

1306 }

1307 case IceType_i16: {	1302 case IceType_i16: {

1308 Operand *Mask =	1303 Operand *ShAmtF =

1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg \| Legal_Flex);	1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),

1310 _tst(SrcLoReg, Mask);	1305 Legal_Reg \| Legal_Flex);

1311 break;	1306 Variable *T = makeReg(IceType_i32);

1312 }	1307 _lsls(T, SrcLoReg, ShAmtF);

	1308 Context.insert(InstFakeUse::create(Func, T));

	1309 } break;

1313 case IceType_i32: {	1310 case IceType_i32: {

1314 _tst(SrcLoReg, SrcLoReg);	1311 _tst(SrcLoReg, SrcLoReg);

1315 break;	1312 break;

1316 }	1313 }

1317 case IceType_i64: {	1314 case IceType_i64: {

1318 Variable *ScratchReg = makeReg(IceType_i32);	1315 Variable *T = makeReg(IceType_i32);

1319 _orrs(ScratchReg, SrcLoReg, SrcHi);	1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg \| Legal_Flex));

1320 // ScratchReg isn't going to be used, but we need the side-effect of	1317 // T isn't going to be used, but we need the side-effect of setting flags

1321 // setting flags from this operation.	1318 // from this operation.

1322 Context.insert(InstFakeUse::create(Func, ScratchReg));	1319 Context.insert(InstFakeUse::create(Func, T));

1323 }	1320 }

1324 }	1321 }

1325 InstARM32Label *Label = InstARM32Label::create(Func, this);	1322 InstARM32Label *Label = InstARM32Label::create(Func, this);

1326 _br(Label, CondARM32::NE);	1323 _br(Label, CondARM32::NE);

1327 _trap();	1324 _trap();

1328 Context.insert(Label);	1325 Context.insert(Label);

1329 }	1326 }

1330	1327

1331 void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,	1328 void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,

1332 Operand *Src1, ExtInstr ExtFunc,	1329 Operand *Src1, ExtInstr ExtFunc,

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1397 _orr(T, Src0, Src1RF);	1394 _orr(T, Src0, Src1RF);

1398 break;	1395 break;

1399 case InstArithmetic::Xor:	1396 case InstArithmetic::Xor:

1400 _eor(T, Src0, Src1RF);	1397 _eor(T, Src0, Src1RF);

1401 break;	1398 break;

1402 }	1399 }

1403 _mov(Dest, T);	1400 _mov(Dest, T);

1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;	1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;

1405 }	1402 }

1406	1403

	1404 namespace {

	1405 // NumericOperands is used during arithmetic/icmp lowering for constant folding.

	1406 // It holds the two sources operands, and maintains some state as to whether one

	1407 // of them is a constant. If one of the operands is a constant, then it will be

	1408 // be stored as the operation's second source, with a bit indicating whether the

	1409 // operands were swapped.

	1410 //

	1411 // The class is split into a base class with operand type-independent methods,

	1412 // and a derived, templated class, for each type of operand we want to fold

	1413 // constants for:

	1414 //

	1415 // NumericOperandsBase --> NumericOperands<ConstantFloat>

	1416 // --> NumericOperands<ConstantDouble>

	1417 // --> NumericOperands<ConstantInt32>

	1418 //

	1419 // NumericOperands<ConstantInt32> also exposes helper methods for emitting

	1420 // inverted/negated immediates.

	1421 class NumericOperandsBase {

	1422 NumericOperandsBase() = delete;

	1423 NumericOperandsBase(const NumericOperandsBase &) = delete;

	1424 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;

	1425

	1426 public:

	1427 NumericOperandsBase(Operand S0, Operand S1)

	1428 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),

	1429 Swapped(Src0 == S1 && S0 != S1) {

	1430 assert(Src0 != nullptr);

	1431 assert(Src1 != nullptr);

	1432 assert(Src0 != Src1 \|\| S0 == S1);

	1433 }

	1434

	1435 bool hasConstOperand() const {

	1436 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);

	1437 }

	1438

	1439 bool swappedOperands() const { return Swapped; }

	1440

	1441 Variable src0R(TargetARM32 Target) const {

	1442 return legalizeToReg(Target, Src0);

	1443 }

	1444

	1445 Variable unswappedSrc0R(TargetARM32 Target) const {

	1446 return legalizeToReg(Target, Swapped ? Src1 : Src0);

	1447 }

	1448

	1449 Operand src1RF(TargetARM32 Target) const {

	1450 return legalizeToRegOrFlex(Target, Src1);

	1451 }

	1452

	1453 Variable unswappedSrc1R(TargetARM32 Target) const {

	1454 return legalizeToReg(Target, Swapped ? Src0 : Src1);

	1455 }

	1456

	1457 Operand unswappedSrc1RF(TargetARM32 Target) const {

	1458 return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1);

	1459 }

	1460

	1461 protected:

	1462 Operand *const Src0;

	1463 Operand *const Src1;

	1464 const bool Swapped;

	1465

	1466 static Variable legalizeToReg(TargetARM32 Target, Operand *Src) {

	1467 return Target->legalizeToReg(Src);

	1468 }

	1469

	1470 static Operand legalizeToRegOrFlex(TargetARM32 Target, Operand *Src) {

	1471 return Target->legalize(Src,

	1472 TargetARM32::Legal_Reg \| TargetARM32::Legal_Flex);

	1473 }

	1474

	1475 private:

	1476 static Operand NonConstOperand(Operand S0, Operand *S1) {

	1477 if (!llvm::isa<Constant>(S0))

	1478 return S0;

	1479 if (!llvm::isa<Constant>(S1))

	1480 return S1;

	1481 if (llvm::isa<ConstantRelocatable>(S1) &&

	1482 !llvm::isa<ConstantRelocatable>(S0))

	1483 return S1;

	1484 return S0;

	1485 }

	1486

	1487 static Operand ConstOperand(Operand S0, Operand *S1) {

	1488 if (!llvm::isa<Constant>(S0))

	1489 return S1;

	1490 if (!llvm::isa<Constant>(S1))

	1491 return S0;

	1492 if (llvm::isa<ConstantRelocatable>(S1) &&

	1493 !llvm::isa<ConstantRelocatable>(S0))

	1494 return S0;

	1495 return S1;

	1496 }

	1497 };

	1498

	1499 template <typename C> class NumericOperands : public NumericOperandsBase {

	1500 NumericOperands() = delete;

	1501 NumericOperands(const NumericOperands &) = delete;

	1502 NumericOperands &operator=(const NumericOperands &) = delete;

	1503

	1504 public:

	1505 NumericOperands(Operand S0, Operand S1) : NumericOperandsBase(S0, S1) {

	1506 assert(!hasConstOperand() \|\| llvm::isa<C>(this->Src1));

	1507 }

	1508

	1509 typename C::PrimType getConstantValue() const {

	1510 return llvm::cast<C>(Src1)->getValue();

	1511 }

	1512 };

	1513

	1514 using FloatOperands = NumericOperands<ConstantFloat>;

	1515 using DoubleOperands = NumericOperands<ConstantDouble>;

	1516

	1517 class Int32Operands : public NumericOperands<ConstantInteger32> {

	1518 Int32Operands() = delete;

	1519 Int32Operands(const Int32Operands &) = delete;

	1520 Int32Operands &operator=(const Int32Operands &) = delete;

	1521

	1522 public:

	1523 Int32Operands(Operand S0, Operand S1) : NumericOperands(S0, S1) {}

	1524

	1525 bool immediateIsFlexEncodable() const {

	1526 uint32_t Rotate, Imm8;

	1527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);

	1528 }

	1529

	1530 bool negatedImmediateIsFlexEncodable() const {

	1531 uint32_t Rotate, Imm8;

	1532 return OperandARM32FlexImm::canHoldImm(

	1533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);

	1534 }

	1535

	1536 Operand negatedSrc1F(TargetARM32 Target) const {

	1537 return legalizeToRegOrFlex(Target,

	1538 Target->getCtx()->getConstantInt32(

	1539 -static_cast<int32_t>(getConstantValue())));

	1540 }

	1541

	1542 bool invertedImmediateIsFlexEncodable() const {

	1543 uint32_t Rotate, Imm8;

	1544 return OperandARM32FlexImm::canHoldImm(

	1545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);

	1546 }

	1547

	1548 Operand invertedSrc1F(TargetARM32 Target) const {

	1549 return legalizeToRegOrFlex(Target,

	1550 Target->getCtx()->getConstantInt32(

	1551 ~static_cast<uint32_t>(getConstantValue())));

	1552 }

	1553 };

	1554 } // end of anonymous namespace

	1555

	1556 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,

	1557 Variable Dest, Operand Src0,

	1558 Operand *Src1) {

	1559 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));

	1560 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));

	1561 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());

	1562 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());

	1563

	1564 // These helper-call-involved instructions are lowered in this separate

	1565 // switch. This is because we would otherwise assume that we need to

	1566 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

	1567 // helper calls, and such unused/redundant instructions will fail liveness

	1568 // analysis under -Om1 setting.

	1569 switch (Op) {

	1570 default:

	1571 break;

	1572 case InstArithmetic::Udiv:

	1573 case InstArithmetic::Sdiv:

	1574 case InstArithmetic::Urem:

	1575 case InstArithmetic::Srem: {

	1576 // Check for divide by 0 (ARM normally doesn't trap, but we want it to

	1577 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

	1578 // register, which will hide a constant source operand. Instead, check

	1579 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

	1580 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {

	1581 if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) {

	1582 _trap();

	1583 return;

	1584 }

	1585 } else {

	1586 Operand *Src1Lo = SrcsLo.unswappedSrc1R(this);

	1587 Operand *Src1Hi = SrcsHi.unswappedSrc1R(this);

	1588 div0Check(IceType_i64, Src1Lo, Src1Hi);

	1589 }

	1590 // Technically, ARM has its own aeabi routines, but we can use the

	1591 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

	1592 // the more standard __moddi3 for rem.

	1593 const char *HelperName = "";

	1594 switch (Op) {

	1595 default:

	1596 llvm::report_fatal_error("Should have only matched div ops.");

	1597 break;

	1598 case InstArithmetic::Udiv:

	1599 HelperName = H_udiv_i64;

	1600 break;

	1601 case InstArithmetic::Sdiv:

	1602 HelperName = H_sdiv_i64;

	1603 break;

	1604 case InstArithmetic::Urem:

	1605 HelperName = H_urem_i64;

	1606 break;

	1607 case InstArithmetic::Srem:

	1608 HelperName = H_srem_i64;

	1609 break;

	1610 }

	1611 constexpr SizeT MaxSrcs = 2;

	1612 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

	1613 Call->addArg(Src0);

	1614 Call->addArg(Src1);

	1615 lowerCall(Call);

	1616 return;

	1617 }

	1618 }

	1619

	1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1622 Variable *T_Lo = makeReg(DestLo->getType());

	1623 Variable *T_Hi = makeReg(DestHi->getType());

	1624

	1625 switch (Op) {

	1626 case InstArithmetic::_num:

	1627 llvm::report_fatal_error("Unknown arithmetic operator");

	1628 return;

	1629 case InstArithmetic::Add: {

	1630 Variable *Src0LoR = SrcsLo.src0R(this);

	1631 Operand *Src1LoRF = SrcsLo.src1RF(this);

	1632 Variable *Src0HiR = SrcsHi.src0R(this);

	1633 Operand *Src1HiRF = SrcsHi.src1RF(this);

	1634 _adds(T_Lo, Src0LoR, Src1LoRF);

	1635 _mov(DestLo, T_Lo);

	1636 _adc(T_Hi, Src0HiR, Src1HiRF);

	1637 _mov(DestHi, T_Hi);

	1638 return;

	1639 }

	1640 case InstArithmetic::And: {

	1641 Variable *Src0LoR = SrcsLo.src0R(this);

	1642 Operand *Src1LoRF = SrcsLo.src1RF(this);

	1643 Variable *Src0HiR = SrcsHi.src0R(this);

	1644 Operand *Src1HiRF = SrcsHi.src1RF(this);

	1645 _and(T_Lo, Src0LoR, Src1LoRF);

	1646 _mov(DestLo, T_Lo);

	1647 _and(T_Hi, Src0HiR, Src1HiRF);

	1648 _mov(DestHi, T_Hi);

	1649 return;

	1650 }

	1651 case InstArithmetic::Or: {

	1652 Variable *Src0LoR = SrcsLo.src0R(this);

	1653 Operand *Src1LoRF = SrcsLo.src1RF(this);

	1654 Variable *Src0HiR = SrcsHi.src0R(this);

	1655 Operand *Src1HiRF = SrcsHi.src1RF(this);

	1656 _orr(T_Lo, Src0LoR, Src1LoRF);

	1657 _mov(DestLo, T_Lo);

	1658 _orr(T_Hi, Src0HiR, Src1HiRF);

	1659 _mov(DestHi, T_Hi);

	1660 return;

	1661 }

	1662 case InstArithmetic::Xor: {

	1663 Variable *Src0LoR = SrcsLo.src0R(this);

	1664 Operand *Src1LoRF = SrcsLo.src1RF(this);

	1665 Variable *Src0HiR = SrcsHi.src0R(this);

	1666 Operand *Src1HiRF = SrcsHi.src1RF(this);

	1667 _eor(T_Lo, Src0LoR, Src1LoRF);

	1668 _mov(DestLo, T_Lo);

	1669 _eor(T_Hi, Src0HiR, Src1HiRF);

	1670 _mov(DestHi, T_Hi);

	1671 return;

	1672 }

	1673 case InstArithmetic::Sub: {

	1674 Variable *Src0LoR = SrcsLo.src0R(this);

	1675 Operand *Src1LoRF = SrcsLo.src1RF(this);

	1676 Variable *Src0HiR = SrcsHi.src0R(this);

	1677 Operand *Src1HiRF = SrcsHi.src1RF(this);

	1678 if (SrcsLo.swappedOperands()) {

	1679 _rsbs(T_Lo, Src0LoR, Src1LoRF);

	1680 _mov(DestLo, T_Lo);

	1681 _rsc(T_Hi, Src0HiR, Src1HiRF);

	1682 _mov(DestHi, T_Hi);

	1683 } else {

	1684 _subs(T_Lo, Src0LoR, Src1LoRF);

	1685 _mov(DestLo, T_Lo);

	1686 _sbc(T_Hi, Src0HiR, Src1HiRF);

	1687 _mov(DestHi, T_Hi);

	1688 }

	1689 return;

	1690 }

	1691 case InstArithmetic::Mul: {

	1692 // GCC 4.8 does:

	1693 // a=b*c ==>

	1694 // t_acc =(mul) (b.lo * c.hi)

	1695 // t_acc =(mla) (c.lo * b.hi) + t_acc

	1696 // t.hi,t.lo =(umull) b.lo * c.lo

	1697 // t.hi += t_acc

	1698 // a.lo = t.lo

	1699 // a.hi = t.hi

	1700 //

	1701 // LLVM does:

	1702 // t.hi,t.lo =(umull) b.lo * c.lo

	1703 // t.hi =(mla) (b.lo * c.hi) + t.hi

	1704 // t.hi =(mla) (b.hi * c.lo) + t.hi

	1705 // a.lo = t.lo

	1706 // a.hi = t.hi

	1707 //

	1708 // LLVM's lowering has fewer instructions, but more register pressure:

	1709 // t.lo is live from beginning to end, while GCC delays the two-dest

	1710 // instruction till the end, and kills c.hi immediately.

	1711 Variable *T_Acc = makeReg(IceType_i32);

	1712 Variable *T_Acc1 = makeReg(IceType_i32);

	1713 Variable *T_Hi1 = makeReg(IceType_i32);

	1714 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);

	1715 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);

	1716 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);

	1717 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);

	1718 _mul(T_Acc, Src0RLo, Src1RHi);

	1719 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

	1720 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

	1721 _add(T_Hi, T_Hi1, T_Acc1);

	1722 _mov(DestLo, T_Lo);

	1723 _mov(DestHi, T_Hi);

	1724 return;

	1725 }

	1726 case InstArithmetic::Shl: {

	1727 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {

	1728 Variable *Src0RLo = SrcsLo.src0R(this);

	1729 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

	1730 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;

	1731 if (ShAmtImm == 0) {

	1732 _mov(DestLo, Src0RLo);

	1733 _mov(DestHi, SrcsHi.src0R(this));

	1734 return;

	1735 }

	1736

	1737 if (ShAmtImm >= 32) {

	1738 if (ShAmtImm == 32) {

	1739 _mov(DestHi, Src0RLo);

	1740 } else {

	1741 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

	1742 Legal_Reg \| Legal_Flex);

	1743 _lsl(T_Hi, Src0RLo, ShAmtOp);

	1744 _mov(DestHi, T_Hi);

	1745 }

	1746

	1747 Operand *_0 =

	1748 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1749 _mov(T_Lo, _0);

	1750 _mov(DestLo, T_Lo);

	1751 return;

	1752 }

	1753

	1754 Variable *Src0RHi = SrcsHi.src0R(this);

	1755 Operand *ShAmtOp =

	1756 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg \| Legal_Flex);

	1757 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

	1758 Legal_Reg \| Legal_Flex);

	1759 _lsl(T_Hi, Src0RHi, ShAmtOp);

	1760 _orr(T_Hi, T_Hi,

	1761 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

	1762 OperandARM32::LSR, ComplShAmtOp));

	1763 _mov(DestHi, T_Hi);

	1764

	1765 _lsl(T_Lo, Src0RLo, ShAmtOp);

	1766 _mov(DestLo, T_Lo);

	1767 return;

	1768 }

	1769

	1770 // a=b<<c ==>

	1771 // pnacl-llc does:

	1772 // mov t_b.lo, b.lo

	1773 // mov t_b.hi, b.hi

	1774 // mov t_c.lo, c.lo

	1775 // rsb T0, t_c.lo, #32

	1776 // lsr T1, t_b.lo, T0

	1777 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

	1778 // sub T2, t_c.lo, #32

	1779 // cmp T2, #0

	1780 // lslge t_a.hi, t_b.lo, T2

	1781 // lsl t_a.lo, t_b.lo, t_c.lo

	1782 // mov a.lo, t_a.lo

	1783 // mov a.hi, t_a.hi

	1784 //

	1785 // GCC 4.8 does:

	1786 // sub t_c1, c.lo, #32

	1787 // lsl t_hi, b.hi, c.lo

	1788 // orr t_hi, t_hi, b.lo, lsl t_c1

	1789 // rsb t_c2, c.lo, #32

	1790 // orr t_hi, t_hi, b.lo, lsr t_c2

	1791 // lsl t_lo, b.lo, c.lo

	1792 // a.lo = t_lo

	1793 // a.hi = t_hi

	1794 //

	1795 // These are incompatible, therefore we mimic pnacl-llc.

	1796 // Can be strength-reduced for constant-shifts, but we don't do that for

	1797 // now.

	1798 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

	1799 // ARM, shifts only take the lower 8 bits of the shift register, and

	1800 // saturate to the range 0-32, so the negative value will saturate to 32.

	1801 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg \| Legal_Flex);

	1802 Operand *_0 =

	1803 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1804 Variable *T0 = makeReg(IceType_i32);

	1805 Variable *T1 = makeReg(IceType_i32);

	1806 Variable *T2 = makeReg(IceType_i32);

	1807 Variable *TA_Hi = makeReg(IceType_i32);

	1808 Variable *TA_Lo = makeReg(IceType_i32);

	1809 Variable *Src0RLo = SrcsLo.src0R(this);

	1810 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);

	1811 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);

	1812 _rsb(T0, Src1RLo, _32);

	1813 _lsr(T1, Src0RLo, T0);

	1814 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1815 OperandARM32::LSL, Src1RLo));

	1816 _sub(T2, Src1RLo, _32);

	1817 _cmp(T2, _0);

	1818 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

	1819 _set_dest_redefined();

	1820 _lsl(TA_Lo, Src0RLo, Src1RLo);

	1821 _mov(DestLo, TA_Lo);

	1822 _mov(DestHi, TA_Hi);

	1823 return;

	1824 }

	1825 case InstArithmetic::Lshr:

	1826 case InstArithmetic::Ashr: {

	1827 const bool ASR = Op == InstArithmetic::Ashr;

	1828 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {

	1829 Variable *Src0RHi = SrcsHi.src0R(this);

	1830 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

	1831 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;

	1832 if (ShAmtImm == 0) {

	1833 _mov(DestHi, Src0RHi);

	1834 _mov(DestLo, SrcsLo.src0R(this));

	1835 return;

	1836 }

	1837

	1838 if (ShAmtImm >= 32) {

	1839 if (ShAmtImm == 32) {

	1840 _mov(DestLo, Src0RHi);

	1841 } else {

	1842 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

	1843 Legal_Reg \| Legal_Flex);

	1844 if (ASR) {

	1845 _asr(T_Lo, Src0RHi, ShAmtOp);

	1846 } else {

	1847 _lsr(T_Lo, Src0RHi, ShAmtOp);

	1848 }

	1849 _mov(DestLo, T_Lo);

	1850 }

	1851

	1852 if (ASR) {

	1853 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),

	1854 Legal_Reg \| Legal_Flex);

	1855 _asr(T_Hi, Src0RHi, _31);

	1856 } else {

	1857 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),

	1858 Legal_Reg \| Legal_Flex);

	1859 _mov(T_Hi, _0);

	1860 }

	1861 _mov(DestHi, T_Hi);

	1862 return;

	1863 }

	1864

	1865 Variable *Src0RLo = SrcsLo.src0R(this);

	1866 Operand *ShAmtOp =

	1867 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg \| Legal_Flex);

	1868 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

	1869 Legal_Reg \| Legal_Flex);

	1870 _lsr(T_Lo, Src0RLo, ShAmtOp);

	1871 _orr(T_Lo, T_Lo,

	1872 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1873 OperandARM32::LSL, ComplShAmtOp));

	1874 _mov(DestLo, T_Lo);

	1875

	1876 if (ASR) {

	1877 _asr(T_Hi, Src0RHi, ShAmtOp);

	1878 } else {

	1879 _lsr(T_Hi, Src0RHi, ShAmtOp);

	1880 }

	1881 _mov(DestHi, T_Hi);

	1882 return;

	1883 }

	1884

	1885 // a=b>>c

	1886 // pnacl-llc does:

	1887 // mov t_b.lo, b.lo

	1888 // mov t_b.hi, b.hi

	1889 // mov t_c.lo, c.lo

	1890 // lsr T0, t_b.lo, t_c.lo

	1891 // rsb T1, t_c.lo, #32

	1892 // orr t_a.lo, T0, t_b.hi, lsl T1

	1893 // sub T2, t_c.lo, #32

	1894 // cmp T2, #0

	1895 // [al]srge t_a.lo, t_b.hi, T2

	1896 // [al]sr t_a.hi, t_b.hi, t_c.lo

	1897 // mov a.lo, t_a.lo

	1898 // mov a.hi, t_a.hi

	1899 //

	1900 // GCC 4.8 does (lsr):

	1901 // rsb t_c1, c.lo, #32

	1902 // lsr t_lo, b.lo, c.lo

	1903 // orr t_lo, t_lo, b.hi, lsl t_c1

	1904 // sub t_c2, c.lo, #32

	1905 // orr t_lo, t_lo, b.hi, lsr t_c2

	1906 // lsr t_hi, b.hi, c.lo

	1907 // mov a.lo, t_lo

	1908 // mov a.hi, t_hi

	1909 //

	1910 // These are incompatible, therefore we mimic pnacl-llc.

	1911 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg \| Legal_Flex);

	1912 Operand *_0 =

	1913 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1914 Variable *T0 = makeReg(IceType_i32);

	1915 Variable *T1 = makeReg(IceType_i32);

	1916 Variable *T2 = makeReg(IceType_i32);

	1917 Variable *TA_Lo = makeReg(IceType_i32);

	1918 Variable *TA_Hi = makeReg(IceType_i32);

	1919 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);

	1920 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);

	1921 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);

	1922 _lsr(T0, Src0RLo, Src1RLo);

	1923 _rsb(T1, Src1RLo, _32);

	1924 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1925 OperandARM32::LSL, T1));

	1926 _sub(T2, Src1RLo, _32);

	1927 _cmp(T2, _0);

	1928 if (ASR) {

	1929 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

	1930 _set_dest_redefined();

	1931 _asr(TA_Hi, Src0RHi, Src1RLo);

	1932 } else {

	1933 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

	1934 _set_dest_redefined();

	1935 _lsr(TA_Hi, Src0RHi, Src1RLo);

	1936 }

	1937 _mov(DestLo, TA_Lo);

	1938 _mov(DestHi, TA_Hi);

	1939 return;

	1940 }

	1941 case InstArithmetic::Fadd:

	1942 case InstArithmetic::Fsub:

	1943 case InstArithmetic::Fmul:

	1944 case InstArithmetic::Fdiv:

	1945 case InstArithmetic::Frem:

	1946 llvm::report_fatal_error("FP instruction with i64 type");

	1947 return;

	1948 case InstArithmetic::Udiv:

	1949 case InstArithmetic::Sdiv:

	1950 case InstArithmetic::Urem:

	1951 case InstArithmetic::Srem:

	1952 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "

	1953 "should have already been handled before");

	1954 return;

	1955 }

	1956 }

	1957

1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {	1958 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

1408 Variable *Dest = Inst->getDest();	1959 Variable *Dest = Inst->getDest();

1409 if (Dest->getType() == IceType_i1) {	1960 if (Dest->getType() == IceType_i1) {

1410 lowerInt1Arithmetic(Inst);	1961 lowerInt1Arithmetic(Inst);

1411 return;	1962 return;

1412 }	1963 }

1413	1964

1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to

1415 // legalize Src0 to flex or Src1 to flex and there is a reversible

1416 // instruction. E.g., reverse subtract with immediate, register vs register,

1417 // immediate.

1418 // Or it may be the case that the operands aren't swapped, but the bits can

1419 // be flipped and a different operation applied. E.g., use BIC (bit clear)

1420 // instead of AND for some masks.

1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	1965 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	1966 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

1423 if (Dest->getType() == IceType_i64) {	1967 if (Dest->getType() == IceType_i64) {

1424 // These helper-call-involved instructions are lowered in this separate	1968 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);

1425 // switch. This is because we would otherwise assume that we need to	1969 return;

1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with	1970 }

1427 // helper calls, and such unused/redundant instructions will fail liveness	1971

1428 // analysis under -Om1 setting.	1972 if (isVectorType(Dest->getType())) {

1429 switch (Inst->getOp()) {

1430 default:

1431 break;

1432 case InstArithmetic::Udiv:

1433 case InstArithmetic::Sdiv:

1434 case InstArithmetic::Urem:

1435 case InstArithmetic::Srem: {

1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to

1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

1438 // register, which will hide a constant source operand. Instead, check

1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

1441 if (C64->getValue() == 0) {

1442 _trap();

1443 return;

1444 }

1445 } else {

1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

1448 div0Check(IceType_i64, Src1Lo, Src1Hi);

1449 }

1450 // Technically, ARM has their own aeabi routines, but we can use the

1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

1452 // the more standard __moddi3 for rem.

1453 const char *HelperName = "";

1454 switch (Inst->getOp()) {

1455 default:

1456 llvm_unreachable("Should have only matched div ops.");

1457 break;

1458 case InstArithmetic::Udiv:

1459 HelperName = H_udiv_i64;

1460 break;

1461 case InstArithmetic::Sdiv:

1462 HelperName = H_sdiv_i64;

1463 break;

1464 case InstArithmetic::Urem:

1465 HelperName = H_urem_i64;

1466 break;

1467 case InstArithmetic::Srem:

1468 HelperName = H_srem_i64;

1469 break;

1470 }

1471 constexpr SizeT MaxSrcs = 2;

1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

1473 Call->addArg(Src0);

1474 Call->addArg(Src1);

1475 lowerCall(Call);

1476 return;

1477 }

1478 }

1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0));

1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));

1483 Operand *Src1Lo = loOperand(Src1);

1484 Operand *Src1Hi = hiOperand(Src1);

1485 Variable *T_Lo = makeReg(DestLo->getType());

1486 Variable *T_Hi = makeReg(DestHi->getType());

1487 switch (Inst->getOp()) {

1488 case InstArithmetic::_num:

1489 llvm_unreachable("Unknown arithmetic operator");

1490 return;

1491 case InstArithmetic::Add:

1492 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1493 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1494 _adds(T_Lo, Src0RLo, Src1Lo);

1495 _mov(DestLo, T_Lo);

1496 _adc(T_Hi, Src0RHi, Src1Hi);

1497 _mov(DestHi, T_Hi);

1498 return;

1499 case InstArithmetic::And:

1500 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1501 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1502 _and(T_Lo, Src0RLo, Src1Lo);

1503 _mov(DestLo, T_Lo);

1504 _and(T_Hi, Src0RHi, Src1Hi);

1505 _mov(DestHi, T_Hi);

1506 return;

1507 case InstArithmetic::Or:

1508 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1509 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1510 _orr(T_Lo, Src0RLo, Src1Lo);

1511 _mov(DestLo, T_Lo);

1512 _orr(T_Hi, Src0RHi, Src1Hi);

1513 _mov(DestHi, T_Hi);

1514 return;

1515 case InstArithmetic::Xor:

1516 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1517 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1518 _eor(T_Lo, Src0RLo, Src1Lo);

1519 _mov(DestLo, T_Lo);

1520 _eor(T_Hi, Src0RHi, Src1Hi);

1521 _mov(DestHi, T_Hi);

1522 return;

1523 case InstArithmetic::Sub:

1524 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1525 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1526 _subs(T_Lo, Src0RLo, Src1Lo);

1527 _mov(DestLo, T_Lo);

1528 _sbc(T_Hi, Src0RHi, Src1Hi);

1529 _mov(DestHi, T_Hi);

1530 return;

1531 case InstArithmetic::Mul: {

1532 // GCC 4.8 does:

1533 // a=b*c ==>

1534 // t_acc =(mul) (b.lo * c.hi)

1535 // t_acc =(mla) (c.lo * b.hi) + t_acc

1536 // t.hi,t.lo =(umull) b.lo * c.lo

1537 // t.hi += t_acc

1538 // a.lo = t.lo

1539 // a.hi = t.hi

1540 //

1541 // LLVM does:

1542 // t.hi,t.lo =(umull) b.lo * c.lo

1543 // t.hi =(mla) (b.lo * c.hi) + t.hi

1544 // t.hi =(mla) (b.hi * c.lo) + t.hi

1545 // a.lo = t.lo

1546 // a.hi = t.hi

1547 //

1548 // LLVM's lowering has fewer instructions, but more register pressure:

1549 // t.lo is live from beginning to end, while GCC delays the two-dest

1550 // instruction till the end, and kills c.hi immediately.

1551 Variable *T_Acc = makeReg(IceType_i32);

1552 Variable *T_Acc1 = makeReg(IceType_i32);

1553 Variable *T_Hi1 = makeReg(IceType_i32);

1554 Variable *Src1RLo = legalizeToReg(Src1Lo);

1555 Variable *Src1RHi = legalizeToReg(Src1Hi);

1556 _mul(T_Acc, Src0RLo, Src1RHi);

1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

1559 _add(T_Hi, T_Hi1, T_Acc1);

1560 _mov(DestLo, T_Lo);

1561 _mov(DestHi, T_Hi);

1562 return;

1563 }

1564 case InstArithmetic::Shl: {

1565 // a=b<<c ==>

1566 // pnacl-llc does:

1567 // mov t_b.lo, b.lo

1568 // mov t_b.hi, b.hi

1569 // mov t_c.lo, c.lo

1570 // rsb T0, t_c.lo, #32

1571 // lsr T1, t_b.lo, T0

1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

1573 // sub T2, t_c.lo, #32

1574 // cmp T2, #0

1575 // lslge t_a.hi, t_b.lo, T2

1576 // lsl t_a.lo, t_b.lo, t_c.lo

1577 // mov a.lo, t_a.lo

1578 // mov a.hi, t_a.hi

1579 //

1580 // GCC 4.8 does:

1581 // sub t_c1, c.lo, #32

1582 // lsl t_hi, b.hi, c.lo

1583 // orr t_hi, t_hi, b.lo, lsl t_c1

1584 // rsb t_c2, c.lo, #32

1585 // orr t_hi, t_hi, b.lo, lsr t_c2

1586 // lsl t_lo, b.lo, c.lo

1587 // a.lo = t_lo

1588 // a.hi = t_hi

1589 //

1590 // These are incompatible, therefore we mimic pnacl-llc.

1591 // Can be strength-reduced for constant-shifts, but we don't do that for

1592 // now.

1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

1594 // ARM, shifts only take the lower 8 bits of the shift register, and

1595 // saturate to the range 0-32, so the negative value will saturate to 32.

1596 Constant *_32 = Ctx->getConstantInt32(32);

1597 Constant *_0 = Ctx->getConstantZero(IceType_i32);

1598 Variable *Src1RLo = legalizeToReg(Src1Lo);

1599 Variable *T0 = makeReg(IceType_i32);

1600 Variable *T1 = makeReg(IceType_i32);

1601 Variable *T2 = makeReg(IceType_i32);

1602 Variable *TA_Hi = makeReg(IceType_i32);

1603 Variable *TA_Lo = makeReg(IceType_i32);

1604 _rsb(T0, Src1RLo, _32);

1605 _lsr(T1, Src0RLo, T0);

1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

1607 OperandARM32::LSL, Src1RLo));

1608 _sub(T2, Src1RLo, _32);

1609 _cmp(T2, _0);

1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

1611 _set_dest_redefined();

1612 _lsl(TA_Lo, Src0RLo, Src1RLo);

1613 _mov(DestLo, TA_Lo);

1614 _mov(DestHi, TA_Hi);

1615 return;

1616 }

1617 case InstArithmetic::Lshr:

1618 case InstArithmetic::Ashr: {

1619 // a=b>>c

1620 // pnacl-llc does:

1621 // mov t_b.lo, b.lo

1622 // mov t_b.hi, b.hi

1623 // mov t_c.lo, c.lo

1624 // lsr T0, t_b.lo, t_c.lo

1625 // rsb T1, t_c.lo, #32

1626 // orr t_a.lo, T0, t_b.hi, lsl T1

1627 // sub T2, t_c.lo, #32

1628 // cmp T2, #0

1629 // [al]srge t_a.lo, t_b.hi, T2

1630 // [al]sr t_a.hi, t_b.hi, t_c.lo

1631 // mov a.lo, t_a.lo

1632 // mov a.hi, t_a.hi

1633 //

1634 // GCC 4.8 does (lsr):

1635 // rsb t_c1, c.lo, #32

1636 // lsr t_lo, b.lo, c.lo

1637 // orr t_lo, t_lo, b.hi, lsl t_c1

1638 // sub t_c2, c.lo, #32

1639 // orr t_lo, t_lo, b.hi, lsr t_c2

1640 // lsr t_hi, b.hi, c.lo

1641 // mov a.lo, t_lo

1642 // mov a.hi, t_hi

1643 //

1644 // These are incompatible, therefore we mimic pnacl-llc.

1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;

1646 Constant *_32 = Ctx->getConstantInt32(32);

1647 Constant *_0 = Ctx->getConstantZero(IceType_i32);

1648 Variable *Src1RLo = legalizeToReg(Src1Lo);

1649 Variable *T0 = makeReg(IceType_i32);

1650 Variable *T1 = makeReg(IceType_i32);

1651 Variable *T2 = makeReg(IceType_i32);

1652 Variable *TA_Lo = makeReg(IceType_i32);

1653 Variable *TA_Hi = makeReg(IceType_i32);

1654 _lsr(T0, Src0RLo, Src1RLo);

1655 _rsb(T1, Src1RLo, _32);

1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

1657 OperandARM32::LSL, T1));

1658 _sub(T2, Src1RLo, _32);

1659 _cmp(T2, _0);

1660 if (IsAshr) {

1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

1662 _set_dest_redefined();

1663 _asr(TA_Hi, Src0RHi, Src1RLo);

1664 } else {

1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

1666 _set_dest_redefined();

1667 _lsr(TA_Hi, Src0RHi, Src1RLo);

1668 }

1669 _mov(DestLo, TA_Lo);

1670 _mov(DestHi, TA_Hi);

1671 return;

1672 }

1673 case InstArithmetic::Fadd:

1674 case InstArithmetic::Fsub:

1675 case InstArithmetic::Fmul:

1676 case InstArithmetic::Fdiv:

1677 case InstArithmetic::Frem:

1678 llvm_unreachable("FP instruction with i64 type");

1679 return;

1680 case InstArithmetic::Udiv:

1681 case InstArithmetic::Sdiv:

1682 case InstArithmetic::Urem:

1683 case InstArithmetic::Srem:

1684 llvm_unreachable("Call-helper-involved instruction for i64 type "

1685 "should have already been handled before");

1686 return;

1687 }

1688 return;

1689 } else if (isVectorType(Dest->getType())) {

1690 // Add a fake def to keep liveness consistent in the meantime.	1973 // Add a fake def to keep liveness consistent in the meantime.

1691 Variable *T = makeReg(Dest->getType());	1974 Variable *T = makeReg(Dest->getType());

1692 Context.insert(InstFakeDef::create(Func, T));	1975 Context.insert(InstFakeDef::create(Func, T));

1693 _mov(Dest, T);	1976 _mov(Dest, T);

1694 UnimplementedError(Func->getContext()->getFlags());	1977 UnimplementedError(Func->getContext()->getFlags());

1695 return;	1978 return;

1696 }	1979 }

	1980

1697 // Dest->getType() is a non-i64 scalar.	1981 // Dest->getType() is a non-i64 scalar.

1698 Variable *Src0R = legalizeToReg(Src0);

1699 Variable *T = makeReg(Dest->getType());	1982 Variable *T = makeReg(Dest->getType());

1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect	1983

	1984 // * Handle div/rem separately. They require a non-legalized Src1 to inspect

1701 // whether or not Src1 is a non-zero constant. Once legalized it is more	1985 // whether or not Src1 is a non-zero constant. Once legalized it is more

1702 // difficult to determine (constant may be moved to a register).	1986 // difficult to determine (constant may be moved to a register).

	1987 // * Handle floating point arithmetic separately: they require Src1 to be

	1988 // legalized to a register.

1703 switch (Inst->getOp()) {	1989 switch (Inst->getOp()) {

1704 default:	1990 default:

1705 break;	1991 break;

1706 case InstArithmetic::Udiv: {	1992 case InstArithmetic::Udiv: {

1707 constexpr bool NotRemainder = false;	1993 constexpr bool NotRemainder = false;

	1994 Variable *Src0R = legalizeToReg(Src0);

1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,	1995 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,

1709 H_udiv_i32, NotRemainder);	1996 H_udiv_i32, NotRemainder);

1710 return;	1997 return;

1711 }	1998 }

1712 case InstArithmetic::Sdiv: {	1999 case InstArithmetic::Sdiv: {

1713 constexpr bool NotRemainder = false;	2000 constexpr bool NotRemainder = false;

	2001 Variable *Src0R = legalizeToReg(Src0);

1714 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,	2002 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,

1715 H_sdiv_i32, NotRemainder);	2003 H_sdiv_i32, NotRemainder);

1716 return;	2004 return;

1717 }	2005 }

1718 case InstArithmetic::Urem: {	2006 case InstArithmetic::Urem: {

1719 constexpr bool IsRemainder = true;	2007 constexpr bool IsRemainder = true;

	2008 Variable *Src0R = legalizeToReg(Src0);

1720 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,	2009 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,

1721 H_urem_i32, IsRemainder);	2010 H_urem_i32, IsRemainder);

1722 return;	2011 return;

1723 }	2012 }

1724 case InstArithmetic::Srem: {	2013 case InstArithmetic::Srem: {

1725 constexpr bool IsRemainder = true;	2014 constexpr bool IsRemainder = true;

	2015 Variable *Src0R = legalizeToReg(Src0);

1726 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,	2016 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,

1727 H_srem_i32, IsRemainder);	2017 H_srem_i32, IsRemainder);

1728 return;	2018 return;

1729 }	2019 }

1730 case InstArithmetic::Frem: {	2020 case InstArithmetic::Frem: {

1731 const SizeT MaxSrcs = 2;	2021 constexpr SizeT MaxSrcs = 2;

	2022 Variable *Src0R = legalizeToReg(Src0);

1732 Type Ty = Dest->getType();	2023 Type Ty = Dest->getType();

1733 InstCall *Call = makeHelperCall(	2024 InstCall *Call = makeHelperCall(

1734 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);	2025 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1735 Call->addArg(Src0R);	2026 Call->addArg(Src0R);

1736 Call->addArg(Src1);	2027 Call->addArg(Src1);

1737 lowerCall(Call);	2028 lowerCall(Call);

1738 return;	2029 return;

1739 }	2030 }

1740 }

1741

1742 // Handle floating point arithmetic separately: they require Src1 to be

1743 // legalized to a register.

1744 switch (Inst->getOp()) {

1745 default:

1746 break;

1747 case InstArithmetic::Fadd: {	2031 case InstArithmetic::Fadd: {

	2032 Variable *Src0R = legalizeToReg(Src0);

1748 Variable *Src1R = legalizeToReg(Src1);	2033 Variable *Src1R = legalizeToReg(Src1);

1749 _vadd(T, Src0R, Src1R);	2034 _vadd(T, Src0R, Src1R);

1750 _mov(Dest, T);	2035 _mov(Dest, T);

1751 return;	2036 return;

1752 }	2037 }

1753 case InstArithmetic::Fsub: {	2038 case InstArithmetic::Fsub: {

	2039 Variable *Src0R = legalizeToReg(Src0);

1754 Variable *Src1R = legalizeToReg(Src1);	2040 Variable *Src1R = legalizeToReg(Src1);

1755 _vsub(T, Src0R, Src1R);	2041 _vsub(T, Src0R, Src1R);

1756 _mov(Dest, T);	2042 _mov(Dest, T);

1757 return;	2043 return;

1758 }	2044 }

1759 case InstArithmetic::Fmul: {	2045 case InstArithmetic::Fmul: {

	2046 Variable *Src0R = legalizeToReg(Src0);

1760 Variable *Src1R = legalizeToReg(Src1);	2047 Variable *Src1R = legalizeToReg(Src1);

1761 _vmul(T, Src0R, Src1R);	2048 _vmul(T, Src0R, Src1R);

1762 _mov(Dest, T);	2049 _mov(Dest, T);

1763 return;	2050 return;

1764 }	2051 }

1765 case InstArithmetic::Fdiv: {	2052 case InstArithmetic::Fdiv: {

	2053 Variable *Src0R = legalizeToReg(Src0);

1766 Variable *Src1R = legalizeToReg(Src1);	2054 Variable *Src1R = legalizeToReg(Src1);

1767 _vdiv(T, Src0R, Src1R);	2055 _vdiv(T, Src0R, Src1R);

1768 _mov(Dest, T);	2056 _mov(Dest, T);

1769 return;	2057 return;

1770 }	2058 }

1771 }	2059 }

1772	2060

1773 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);	2061 // Handle everything else here.

	2062 Int32Operands Srcs(Src0, Src1);

1774 switch (Inst->getOp()) {	2063 switch (Inst->getOp()) {

1775 case InstArithmetic::_num:	2064 case InstArithmetic::_num:

1776 llvm_unreachable("Unknown arithmetic operator");	2065 llvm::report_fatal_error("Unknown arithmetic operator");

1777 return;	2066 return;

1778 case InstArithmetic::Add:	2067 case InstArithmetic::Add: {

	2068 if (Srcs.hasConstOperand()) {

	2069 if (!Srcs.immediateIsFlexEncodable() &&

	2070 Srcs.negatedImmediateIsFlexEncodable()) {

	2071 Variable *Src0R = Srcs.src0R(this);

	2072 Operand *Src1F = Srcs.negatedSrc1F(this);

	2073 if (!Srcs.swappedOperands()) {

	2074 _sub(T, Src0R, Src1F);

	2075 } else {

	2076 _rsb(T, Src0R, Src1F);

	2077 }

	2078 _mov(Dest, T);

	2079 return;

	2080 }

	2081 }

	2082 Variable *Src0R = Srcs.src0R(this);

	2083 Operand *Src1RF = Srcs.src1RF(this);

1779 _add(T, Src0R, Src1RF);	2084 _add(T, Src0R, Src1RF);

1780 _mov(Dest, T);	2085 _mov(Dest, T);

1781 return;	2086 return;

1782 case InstArithmetic::And:	2087 }

	2088 case InstArithmetic::And: {

	2089 if (Srcs.hasConstOperand()) {

	2090 if (!Srcs.immediateIsFlexEncodable() &&

	2091 Srcs.invertedImmediateIsFlexEncodable()) {

	2092 Variable *Src0R = Srcs.src0R(this);

	2093 Operand *Src1F = Srcs.invertedSrc1F(this);

	2094 _bic(T, Src0R, Src1F);

	2095 _mov(Dest, T);

	2096 return;

	2097 }

	2098 }

	2099 Variable *Src0R = Srcs.src0R(this);

	2100 Operand *Src1RF = Srcs.src1RF(this);

1783 _and(T, Src0R, Src1RF);	2101 _and(T, Src0R, Src1RF);

1784 _mov(Dest, T);	2102 _mov(Dest, T);

1785 return;	2103 return;

1786 case InstArithmetic::Or:	2104 }

	2105 case InstArithmetic::Or: {

	2106 Variable *Src0R = Srcs.src0R(this);

	2107 Operand *Src1RF = Srcs.src1RF(this);

1787 _orr(T, Src0R, Src1RF);	2108 _orr(T, Src0R, Src1RF);

1788 _mov(Dest, T);	2109 _mov(Dest, T);

1789 return;	2110 return;

1790 case InstArithmetic::Xor:	2111 }

	2112 case InstArithmetic::Xor: {

	2113 Variable *Src0R = Srcs.src0R(this);

	2114 Operand *Src1RF = Srcs.src1RF(this);

1791 _eor(T, Src0R, Src1RF);	2115 _eor(T, Src0R, Src1RF);

1792 _mov(Dest, T);	2116 _mov(Dest, T);

1793 return;	2117 return;

1794 case InstArithmetic::Sub:	2118 }

1795 _sub(T, Src0R, Src1RF);	2119 case InstArithmetic::Sub: {

	2120 if (Srcs.hasConstOperand()) {

	2121 Variable *Src0R = Srcs.src0R(this);

	2122 if (Srcs.immediateIsFlexEncodable()) {

	2123 Operand *Src1RF = Srcs.src1RF(this);

	2124 if (Srcs.swappedOperands()) {

	2125 _rsb(T, Src0R, Src1RF);

	2126 } else {

	2127 _sub(T, Src0R, Src1RF);

	2128 }

	2129 _mov(Dest, T);

	2130 return;

	2131 }

	2132 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {

	2133 Operand *Src1F = Srcs.negatedSrc1F(this);

	2134 _add(T, Src0R, Src1F);

	2135 _mov(Dest, T);

	2136 return;

	2137 }

	2138 }

	2139 Variable *Src0R = Srcs.unswappedSrc0R(this);

	2140 Variable *Src1R = Srcs.unswappedSrc1R(this);

	2141 _sub(T, Src0R, Src1R);

1796 _mov(Dest, T);	2142 _mov(Dest, T);

1797 return;	2143 return;

	2144 }

1798 case InstArithmetic::Mul: {	2145 case InstArithmetic::Mul: {

1799 Variable *Src1R = legalizeToReg(Src1RF);	2146 Variable *Src0R = Srcs.unswappedSrc0R(this);

	2147 Variable *Src1R = Srcs.unswappedSrc1R(this);

1800 _mul(T, Src0R, Src1R);	2148 _mul(T, Src0R, Src1R);

1801 _mov(Dest, T);	2149 _mov(Dest, T);

1802 return;	2150 return;

1803 }	2151 }

1804 case InstArithmetic::Shl:	2152 case InstArithmetic::Shl: {

1805 _lsl(T, Src0R, Src1RF);	2153 Variable *Src0R = Srcs.unswappedSrc0R(this);

	2154 Operand *Src1R = Srcs.unswappedSrc1RF(this);

	2155 _lsl(T, Src0R, Src1R);

1806 _mov(Dest, T);	2156 _mov(Dest, T);

1807 return;	2157 return;

1808 case InstArithmetic::Lshr:	2158 }

	2159 case InstArithmetic::Lshr: {

	2160 Variable *Src0R = Srcs.unswappedSrc0R(this);

1809 if (Dest->getType() != IceType_i32) {	2161 if (Dest->getType() != IceType_i32) {

1810 _uxt(Src0R, Src0R);	2162 _uxt(Src0R, Src0R);

1811 }	2163 }

1812 _lsr(T, Src0R, Src1RF);	2164 _lsr(T, Src0R, Srcs.unswappedSrc1RF(this));

1813 _mov(Dest, T);	2165 _mov(Dest, T);

1814 return;	2166 return;

1815 case InstArithmetic::Ashr:	2167 }

	2168 case InstArithmetic::Ashr: {

	2169 Variable *Src0R = Srcs.unswappedSrc0R(this);

1816 if (Dest->getType() != IceType_i32) {	2170 if (Dest->getType() != IceType_i32) {

1817 _sxt(Src0R, Src0R);	2171 _sxt(Src0R, Src0R);

1818 }	2172 }

1819 _asr(T, Src0R, Src1RF);	2173 _asr(T, Src0R, Srcs.unswappedSrc1RF(this));

1820 _mov(Dest, T);	2174 _mov(Dest, T);

1821 return;	2175 return;

	2176 }

1822 case InstArithmetic::Udiv:	2177 case InstArithmetic::Udiv:

1823 case InstArithmetic::Sdiv:	2178 case InstArithmetic::Sdiv:

1824 case InstArithmetic::Urem:	2179 case InstArithmetic::Urem:

1825 case InstArithmetic::Srem:	2180 case InstArithmetic::Srem:

1826 llvm_unreachable("Integer div/rem should have been handled earlier.");	2181 llvm::report_fatal_error(

	2182 "Integer div/rem should have been handled earlier.");

1827 return;	2183 return;

1828 case InstArithmetic::Fadd:	2184 case InstArithmetic::Fadd:

1829 case InstArithmetic::Fsub:	2185 case InstArithmetic::Fsub:

1830 case InstArithmetic::Fmul:	2186 case InstArithmetic::Fmul:

1831 case InstArithmetic::Fdiv:	2187 case InstArithmetic::Fdiv:

1832 case InstArithmetic::Frem:	2188 case InstArithmetic::Frem:

1833 llvm_unreachable("Floating point arith should have been handled earlier.");	2189 llvm::report_fatal_error(

	2190 "Floating point arith should have been handled earlier.");

1834 return;	2191 return;

1835 }	2192 }

1836 }	2193 }

1837	2194

1838 void TargetARM32::lowerAssign(const InstAssign *Inst) {	2195 void TargetARM32::lowerAssign(const InstAssign *Inst) {

1839 Variable *Dest = Inst->getDest();	2196 Variable *Dest = Inst->getDest();

1840 Operand *Src0 = Inst->getSrc(0);	2197 Operand *Src0 = Inst->getSrc(0);

1841 assert(Dest->getType() == Src0->getType());	2198 assert(Dest->getType() == Src0->getType());

1842 if (Dest->getType() == IceType_i64) {	2199 if (Dest->getType() == IceType_i64) {

1843 Src0 = legalizeUndef(Src0);	2200 Src0 = legalizeUndef(Src0);

	2201

	2202 Variable *T_Lo = makeReg(IceType_i32);

	2203 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);	2204 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);

1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1848 Variable *T_Lo = makeReg(IceType_i32);

1849 Variable *T_Hi = makeReg(IceType_i32);

1850

1851 _mov(T_Lo, Src0Lo);	2205 _mov(T_Lo, Src0Lo);

1852 _mov(DestLo, T_Lo);	2206 _mov(DestLo, T_Lo);

	2207

	2208 Variable *T_Hi = makeReg(IceType_i32);

	2209 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	2210 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

1853 _mov(T_Hi, Src0Hi);	2211 _mov(T_Hi, Src0Hi);

1854 _mov(DestHi, T_Hi);	2212 _mov(DestHi, T_Hi);

	2213

	2214 return;

	2215 }

	2216

	2217 Operand *NewSrc;

	2218 if (Dest->hasReg()) {

	2219 // If Dest already has a physical register, then legalize the Src operand

	2220 // into a Variable with the same register assignment. This especially

	2221 // helps allow the use of Flex operands.

	2222 NewSrc = legalize(Src0, Legal_Reg \| Legal_Flex, Dest->getRegNum());

1855 } else {	2223 } else {

1856 Operand *NewSrc;	2224 // Dest could be a stack operand. Since we could potentially need to do a

1857 if (Dest->hasReg()) {	2225 // Store (and store can only have Register operands), legalize this to a

1858 // If Dest already has a physical register, then legalize the Src operand	2226 // register.

1859 // into a Variable with the same register assignment. This especially	2227 NewSrc = legalize(Src0, Legal_Reg);

1860 // helps allow the use of Flex operands.

1861 NewSrc = legalize(Src0, Legal_Reg \| Legal_Flex, Dest->getRegNum());

1862 } else {

1863 // Dest could be a stack operand. Since we could potentially need to do a

1864 // Store (and store can only have Register operands), legalize this to a

1865 // register.

1866 NewSrc = legalize(Src0, Legal_Reg);

1867 }

1868 if (isVectorType(Dest->getType())) {

1869 Variable *SrcR = legalizeToReg(NewSrc);

1870 _mov(Dest, SrcR);

1871 } else if (isFloatingType(Dest->getType())) {

1872 Variable *SrcR = legalizeToReg(NewSrc);

1873 _mov(Dest, SrcR);

1874 } else {

1875 _mov(Dest, NewSrc);

1876 }

1877 }	2228 }

	2229

	2230 if (isVectorType(Dest->getType()) \|\| isScalarFloatingType(Dest->getType())) {

	2231 NewSrc = legalize(NewSrc, Legal_Reg \| Legal_Mem);

	2232 }

	2233 _mov(Dest, NewSrc);

1878 }	2234 }

1879	2235

1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(	2236 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(

1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,	2237 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,

1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {	2238 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {

1883 InstARM32Label *NewShortCircuitLabel = nullptr;	2239 InstARM32Label *NewShortCircuitLabel = nullptr;

1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);	2240 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);

1885	2241

1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean);	2242 const Inst *Producer = BoolComputations.getProducerOf(Boolean);

1887	2243

(...skipping 685 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2573 struct {	2929 struct {

2574 CondARM32::Cond CC0;	2930 CondARM32::Cond CC0;

2575 CondARM32::Cond CC1;	2931 CondARM32::Cond CC1;

2576 } TableFcmp[] = {	2932 } TableFcmp[] = {

2577 #define X(val, CC0, CC1) \	2933 #define X(val, CC0, CC1) \

2578 { CondARM32::CC0, CondARM32::CC1 } \	2934 { CondARM32::CC0, CondARM32::CC1 } \

2579 ,	2935 ,

2580 FCMPARM32_TABLE	2936 FCMPARM32_TABLE

2581 #undef X	2937 #undef X

2582 };	2938 };

	2939

	2940 bool isFloatingPointZero(Operand *Src) {

	2941 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {

	2942 return Utils::isPositiveZero(F32->getValue());

	2943 }

	2944

	2945 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {

	2946 return Utils::isPositiveZero(F64->getValue());

	2947 }

	2948

	2949 return false;

	2950 }

2583 } // end of anonymous namespace	2951 } // end of anonymous namespace

2584	2952

2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {	2953 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {

2586 InstFcmp::FCond Condition = Instr->getCondition();	2954 InstFcmp::FCond Condition = Instr->getCondition();

2587 switch (Condition) {	2955 switch (Condition) {

2588 case InstFcmp::False:	2956 case InstFcmp::False:

2589 return CondWhenTrue(CondARM32::kNone);	2957 return CondWhenTrue(CondARM32::kNone);

2590 case InstFcmp::True:	2958 case InstFcmp::True:

2591 return CondWhenTrue(CondARM32::AL);	2959 return CondWhenTrue(CondARM32::AL);

2592 break;	2960 break;

2593 default: {	2961 default: {

2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0));	2962 Variable *Src0R = legalizeToReg(Instr->getSrc(0));

2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1));	2963 Operand *Src1 = Instr->getSrc(1);

2596 _vcmp(Src0R, Src1R);	2964 if (isFloatingPointZero(Src1)) {

	2965 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));

	2966 } else {

	2967 _vcmp(Src0R, legalizeToReg(Src1));

	2968 }

2597 _vmrs();	2969 _vmrs();

2598 assert(Condition < llvm::array_lengthof(TableFcmp));	2970 assert(Condition < llvm::array_lengthof(TableFcmp));

2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);	2971 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);

2600 }	2972 }

2601 }	2973 }

2602 }	2974 }

2603	2975

2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {	2976 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {

2605 Variable *Dest = Instr->getDest();	2977 Variable *Dest = Instr->getDest();

2606 if (isVectorType(Dest->getType())) {	2978 if (isVectorType(Dest->getType())) {

(...skipping 28 matching lines...) Expand all Loading...
2635 _mov(T, _1, Cond.WhenTrue0);	3007 _mov(T, _1, Cond.WhenTrue0);

2636 }	3008 }

2637	3009

2638 if (Cond.WhenTrue1 != CondARM32::kNone) {	3010 if (Cond.WhenTrue1 != CondARM32::kNone) {

2639 _mov_redefined(T, _1, Cond.WhenTrue1);	3011 _mov_redefined(T, _1, Cond.WhenTrue1);

2640 }	3012 }

2641	3013

2642 _mov(Dest, T);	3014 _mov(Dest, T);

2643 }	3015 }

2644	3016

2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {	3017 TargetARM32::CondWhenTrue

2646 assert(Inst->getSrc(0)->getType() != IceType_i1);	3018 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

2647 assert(Inst->getSrc(1)->getType() != IceType_i1);	3019 Operand *Src1) {

	3020 size_t Index = static_cast<size_t>(Condition);

	3021 assert(Index < llvm::array_lengthof(TableIcmp64));

2648	3022

2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	3023 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));

2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	3024 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));

	3025 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());

	3026 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());

	3027

	3028 if (SrcsLo.hasConstOperand()) {

	3029 const uint32_t ValueLo = SrcsLo.getConstantValue();

	3030 const uint32_t ValueHi = SrcsHi.getConstantValue();

	3031 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) \| ValueLo;

	3032 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) &&

	3033 Value == 0) {

	3034 Variable *T = makeReg(IceType_i32);

	3035 Variable *Src0LoR = SrcsLo.src0R(this);

	3036 Variable *Src0HiR = SrcsHi.src0R(this);

	3037 _orrs(T, Src0LoR, Src0HiR);

	3038 Context.insert(InstFakeUse::create(Func, T));

	3039 return CondWhenTrue(TableIcmp64[Index].C1);

	3040 }

	3041

	3042 Variable *Src0RLo = SrcsLo.src0R(this);

	3043 Variable *Src0RHi = SrcsHi.src0R(this);

	3044 Operand *Src1RFLo = SrcsLo.src1RF(this);

	3045 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);

	3046

	3047 const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands();

	3048

	3049 if (UseRsb) {

	3050 if (TableIcmp64[Index].IsSigned) {

	3051 Variable *T = makeReg(IceType_i32);

	3052 _rsbs(T, Src0RLo, Src1RFLo);

	3053 Context.insert(InstFakeUse::create(Func, T));

	3054

	3055 T = makeReg(IceType_i32);

	3056 _rscs(T, Src0RHi, Src1RFHi);

	3057 // We need to add a FakeUse here because liveness gets mad at us (Def

	3058 // without Use.) Note that flag-setting instructions are considered to

	3059 // have side effects and, therefore, are not DCE'ed.

	3060 Context.insert(InstFakeUse::create(Func, T));

	3061 } else {

	3062 Variable *T = makeReg(IceType_i32);

	3063 _rsbs(T, Src0RHi, Src1RFHi);

	3064 Context.insert(InstFakeUse::create(Func, T));

	3065

	3066 T = makeReg(IceType_i32);

	3067 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);

	3068 Context.insert(InstFakeUse::create(Func, T));

	3069 }

	3070 } else {

	3071 if (TableIcmp64[Index].IsSigned) {

	3072 _cmp(Src0RLo, Src1RFLo);

	3073 Variable *T = makeReg(IceType_i32);

	3074 _sbcs(T, Src0RHi, Src1RFHi);

	3075 Context.insert(InstFakeUse::create(Func, T));

	3076 } else {

	3077 _cmp(Src0RHi, Src1RFHi);

	3078 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

	3079 }

	3080 }

	3081

	3082 return CondWhenTrue(TableIcmp64[Index].C1);

	3083 }

	3084

	3085 Variable Src0RLo, Src0RHi;

	3086 Operand Src1RFLo, Src1RFHi;

	3087 if (TableIcmp64[Index].Swapped) {

	3088 Src0RLo = legalizeToReg(loOperand(Src1));

	3089 Src0RHi = legalizeToReg(hiOperand(Src1));

	3090 Src1RFLo = legalizeToReg(loOperand(Src0));

	3091 Src1RFHi = legalizeToReg(hiOperand(Src0));

	3092 } else {

	3093 Src0RLo = legalizeToReg(loOperand(Src0));

	3094 Src0RHi = legalizeToReg(hiOperand(Src0));

	3095 Src1RFLo = legalizeToReg(loOperand(Src1));

	3096 Src1RFHi = legalizeToReg(hiOperand(Src1));

	3097 }

2651	3098

2652 // a=icmp cond, b, c ==>	3099 // a=icmp cond, b, c ==>

2653 // GCC does:	3100 // GCC does:

2654 // cmp b.hi, c.hi or cmp b.lo, c.lo	3101 // cmp b.hi, c.hi or cmp b.lo, c.lo

2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi	3102 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi

2656 // mov.<C1> t, #1 mov.<C1> t, #1	3103 // mov.<C1> t, #1 mov.<C1> t, #1

2657 // mov.<C2> t, #0 mov.<C2> t, #0	3104 // mov.<C2> t, #0 mov.<C2> t, #0

2658 // mov a, t mov a, t	3105 // mov a, t mov a, t

2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"	3106 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"

2660 // is used for signed compares. In some cases, b and c need to be swapped as	3107 // is used for signed compares. In some cases, b and c need to be swapped as

(...skipping 10 matching lines...) Expand all Loading...
2671 // that's nice in that it's just as short but has fewer dependencies for	3118 // that's nice in that it's just as short but has fewer dependencies for

2672 // better ILP at the cost of more registers.	3119 // better ILP at the cost of more registers.

2673 //	3120 //

2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two	3121 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two

2675 // unconditional mov #0, two cmps, two conditional mov #1, and one	3122 // unconditional mov #0, two cmps, two conditional mov #1, and one

2676 // conditional reg mov. That has few dependencies for good ILP, but is a	3123 // conditional reg mov. That has few dependencies for good ILP, but is a

2677 // longer sequence.	3124 // longer sequence.

2678 //	3125 //

2679 // So, we are going with the GCC version since it's usually better (except	3126 // So, we are going with the GCC version since it's usually better (except

2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later.	3127 // perhaps for eq/ne). We could revisit special-casing eq/ne later.

	3128 if (TableIcmp64[Index].IsSigned) {

	3129 Variable *ScratchReg = makeReg(IceType_i32);

	3130 _cmp(Src0RLo, Src1RFLo);

	3131 _sbcs(ScratchReg, Src0RHi, Src1RFHi);

	3132 // ScratchReg isn't going to be used, but we need the side-effect of

	3133 // setting flags from this operation.

	3134 Context.insert(InstFakeUse::create(Func, ScratchReg));

	3135 } else {

	3136 _cmp(Src0RHi, Src1RFHi);

	3137 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

	3138 }

	3139 return CondWhenTrue(TableIcmp64[Index].C1);

	3140 }

2681	3141

2682 if (Src0->getType() == IceType_i64) {	3142 TargetARM32::CondWhenTrue

2683 InstIcmp::ICond Conditon = Inst->getCondition();	3143 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

2684 size_t Index = static_cast<size_t>(Conditon);	3144 Operand *Src1) {

2685 assert(Index < llvm::array_lengthof(TableIcmp64));	3145 Int32Operands Srcs(Src0, Src1);

2686 Variable Src0Lo, Src0Hi;	3146 if (!Srcs.hasConstOperand()) {

2687 Operand Src1LoRF, Src1HiRF;	3147

2688 if (TableIcmp64[Index].Swapped) {	3148 Variable *Src0R = Srcs.src0R(this);

2689 Src0Lo = legalizeToReg(loOperand(Src1));	3149 Operand *Src1RF = Srcs.src1RF(this);

2690 Src0Hi = legalizeToReg(hiOperand(Src1));	3150 _cmp(Src0R, Src1RF);

2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);	3151 return CondWhenTrue(getIcmp32Mapping(Condition));

2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

2693 } else {

2694 Src0Lo = legalizeToReg(loOperand(Src0));

2695 Src0Hi = legalizeToReg(hiOperand(Src0));

2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

2698 }

2699 if (TableIcmp64[Index].IsSigned) {

2700 Variable *ScratchReg = makeReg(IceType_i32);

2701 _cmp(Src0Lo, Src1LoRF);

2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF);

2703 // ScratchReg isn't going to be used, but we need the side-effect of

2704 // setting flags from this operation.

2705 Context.insert(InstFakeUse::create(Func, ScratchReg));

2706 } else {

2707 _cmp(Src0Hi, Src1HiRF);

2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);

2709 }

2710 return CondWhenTrue(TableIcmp64[Index].C1);

2711 }	3152 }

2712	3153

	3154 Variable *Src0R = Srcs.src0R(this);

	3155 const int32_t Value = Srcs.getConstantValue();

	3156 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) && Value == 0) {

	3157 _tst(Src0R, Src0R);

	3158 return CondWhenTrue(getIcmp32Mapping(Condition));

	3159 }

	3160

	3161 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&

	3162 Srcs.negatedImmediateIsFlexEncodable()) {

	3163 Operand *Src1F = Srcs.negatedSrc1F(this);

	3164 _cmn(Src0R, Src1F);

	3165 return CondWhenTrue(getIcmp32Mapping(Condition));

	3166 }

	3167

	3168 Operand *Src1RF = Srcs.src1RF(this);

	3169 if (!Srcs.swappedOperands()) {

	3170 _cmp(Src0R, Src1RF);

	3171 } else {

	3172 Variable *T = makeReg(IceType_i32);

	3173 _rsbs(T, Src0R, Src1RF);

	3174 Context.insert(InstFakeUse::create(Func, T));

	3175 }

	3176 return CondWhenTrue(getIcmp32Mapping(Condition));

	3177 }

	3178

	3179 TargetARM32::CondWhenTrue

	3180 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

	3181 Operand *Src1) {

	3182 Int32Operands Srcs(Src0, Src1);

	3183 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());

	3184 assert(ShAmt >= 0);

	3185

	3186 if (!Srcs.hasConstOperand()) {

	3187 Variable *Src0R = makeReg(IceType_i32);

	3188 Operand *ShAmtF =

	3189 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg \| Legal_Flex);

	3190 _lsl(Src0R, legalizeToReg(Src0), ShAmtF);

	3191

	3192 Variable *Src1R = legalizeToReg(Src1);

	3193 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(

	3194 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);

	3195 _cmp(Src0R, Src1F);

	3196 return CondWhenTrue(getIcmp32Mapping(Condition));

	3197 }

	3198

	3199 const int32_t Value = Srcs.getConstantValue();

	3200 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) && Value == 0) {

	3201 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);

	3202 Variable *T = makeReg(IceType_i32);

	3203 _lsls(T, Srcs.src0R(this), ShAmtOp);

	3204 Context.insert(InstFakeUse::create(Func, T));

	3205 return CondWhenTrue(getIcmp32Mapping(Condition));

	3206 }

	3207

	3208 Variable *ConstR = makeReg(IceType_i32);

	3209 _mov(ConstR,

	3210 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg \| Legal_Flex));

	3211 Operand *NonConstF = OperandARM32FlexReg::create(

	3212 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,

	3213 Ctx->getConstantInt32(ShAmt));

	3214

	3215 if (Srcs.swappedOperands()) {

	3216 _cmp(ConstR, NonConstF);

	3217 } else {

	3218 Variable *T = makeReg(IceType_i32);

	3219 _rsbs(T, ConstR, NonConstF);

	3220 Context.insert(InstFakeUse::create(Func, T));

	3221 }

	3222 return CondWhenTrue(getIcmp32Mapping(Condition));

	3223 }

	3224

	3225 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

	3226 assert(Inst->getSrc(0)->getType() != IceType_i1);

	3227 assert(Inst->getSrc(1)->getType() != IceType_i1);

	3228

	3229 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

	3230 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

	3231

	3232 const InstIcmp::ICond Condition = Inst->getCondition();

2713 // a=icmp cond b, c ==>	3233 // a=icmp cond b, c ==>

2714 // GCC does:	3234 // GCC does:

2715 // <u/s>xtb tb, b	3235 // <u/s>xtb tb, b

2716 // <u/s>xtb tc, c	3236 // <u/s>xtb tc, c

2717 // cmp tb, tc	3237 // cmp tb, tc

2718 // mov.C1 t, #0	3238 // mov.C1 t, #0

2719 // mov.C2 t, #1	3239 // mov.C2 t, #1

2720 // mov a, t	3240 // mov a, t

2721 // where the unsigned/sign extension is not needed for 32-bit. They also have	3241 // where the unsigned/sign extension is not needed for 32-bit. They also have

2722 // special cases for EQ and NE. E.g., for NE:	3242 // special cases for EQ and NE. E.g., for NE:

2723 // <extend to tb, tc>	3243 // <extend to tb, tc>

2724 // subs t, tb, tc	3244 // subs t, tb, tc

2725 // movne t, #1	3245 // movne t, #1

2726 // mov a, t	3246 // mov a, t

2727 //	3247 //

2728 // LLVM does:	3248 // LLVM does:

2729 // lsl tb, b, #<N>	3249 // lsl tb, b, #<N>

2730 // mov t, #0	3250 // mov t, #0

2731 // cmp tb, c, lsl #<N>	3251 // cmp tb, c, lsl #<N>

2732 // mov.<C> t, #1	3252 // mov.<C> t, #1

2733 // mov a, t	3253 // mov a, t

2734 //	3254 //

2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on	3255 // the left shift is by 0, 16, or 24, which allows the comparison to focus on

2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For	3256 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For

2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb	3257 // the unsigned case, for some reason it does similar to GCC and does a uxtb

2738 // first. It's not clear to me why that special-casing is needed.	3258 // first. It's not clear to me why that special-casing is needed.

2739 //	3259 //

2740 // We'll go with the LLVM way for now, since it's shorter and has just as few	3260 // We'll go with the LLVM way for now, since it's shorter and has just as few

2741 // dependencies.	3261 // dependencies.

2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());	3262 switch (Src0->getType()) {

2743 assert(ShiftAmt >= 0);	3263 default:

2744 Constant *ShiftConst = nullptr;	3264 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");

2745 Variable *Src0R = nullptr;	3265 case IceType_i8:

2746 if (ShiftAmt) {	3266 case IceType_i16:

2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt);	3267 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);

2748 Src0R = makeReg(IceType_i32);	3268 case IceType_i32:

2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);	3269 return lowerInt32IcmpCond(Condition, Src0, Src1);

2750 } else {	3270 case IceType_i64:

2751 Src0R = legalizeToReg(Src0);	3271 return lowerInt64IcmpCond(Condition, Src0, Src1);

2752 }	3272 }

2753 if (ShiftAmt) {

2754 Variable *Src1R = legalizeToReg(Src1);

2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(

2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);

2757 _cmp(Src0R, Src1RShifted);

2758 } else {

2759 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);

2760 _cmp(Src0R, Src1RF);

2761 }

2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));

2763 }	3273 }

2764	3274

2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {	3275 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

2766 Variable *Dest = Inst->getDest();	3276 Variable *Dest = Inst->getDest();

2767	3277

2768 if (isVectorType(Dest->getType())) {	3278 if (isVectorType(Dest->getType())) {

2769 Variable *T = makeReg(Dest->getType());	3279 Variable *T = makeReg(Dest->getType());

2770 Context.insert(InstFakeDef::create(Func, T));	3280 Context.insert(InstFakeDef::create(Func, T));

2771 _mov(Dest, T);	3281 _mov(Dest, T);

2772 UnimplementedError(Func->getContext()->getFlags());	3282 UnimplementedError(Func->getContext()->getFlags());

(...skipping 1474 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4247 }	4757 }

4248 return Reg;	4758 return Reg;

4249 }	4759 }

4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {	4760 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {

4251 Variable *Reg = makeReg(Ty, RegNum);	4761 Variable *Reg = makeReg(Ty, RegNum);

4252 _movw(Reg, C);	4762 _movw(Reg, C);

4253 _movt(Reg, C);	4763 _movt(Reg, C);

4254 return Reg;	4764 return Reg;

4255 } else {	4765 } else {

4256 assert(isScalarFloatingType(Ty));	4766 assert(isScalarFloatingType(Ty));

	4767 uint32_t ModifiedImm;

	4768 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {

	4769 Variable *T = makeReg(Ty, RegNum);

	4770 _mov(T,

	4771 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));

	4772 return T;

	4773 }

	4774

	4775 if (Ty == IceType_f64 && isFloatingPointZero(From)) {

	4776 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32

	4777 // because ARM does not have a veor instruction with S registers.

	4778 Variable *T = makeReg(IceType_f64, RegNum);

	4779 Context.insert(InstFakeDef::create(Func, T));

	4780 _veor(T, T, T);

	4781 return T;

	4782 }

	4783

4257 // Load floats/doubles from literal pool.	4784 // Load floats/doubles from literal pool.

4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an

4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified

4260 // immediate constants". Or, for 32-bit floating point numbers, just

4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG

4262 // instead of using a movw/movt pair to get the const-pool address then

4263 // loading to SREG.

4264 std::string Buffer;	4785 std::string Buffer;

4265 llvm::raw_string_ostream StrBuf(Buffer);	4786 llvm::raw_string_ostream StrBuf(Buffer);

4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);	4787 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);

4267 llvm::cast<Constant>(From)->setShouldBePooled(true);	4788 llvm::cast<Constant>(From)->setShouldBePooled(true);

4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);	4789 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);

4269 Variable *BaseReg = makeReg(getPointerType());	4790 Variable *BaseReg = makeReg(getPointerType());

4270 _movw(BaseReg, Offset);	4791 _movw(BaseReg, Offset);

4271 _movt(BaseReg, Offset);	4792 _movt(BaseReg, Offset);

4272 From = formMemoryOperand(BaseReg, Ty);	4793 From = formMemoryOperand(BaseReg, Ty);

4273 return copyToReg(From, RegNum);	4794 return copyToReg(From, RegNum);

(...skipping 625 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	5420 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

4900 // However, for compatibility with current NaCl LLVM, don't claim that.	5421 // However, for compatibility with current NaCl LLVM, don't claim that.

4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	5422 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

4902 }	5423 }

4903	5424

4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];	5425 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];

4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];	5426 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

4906 llvm::SmallBitVector TargetARM32::ScratchRegs;	5427 llvm::SmallBitVector TargetARM32::ScratchRegs;

4907	5428

4908 } // end of namespace Ice	5429 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86BaseImpl.h » ('j') | no next file with comments »