src/IceTargetLoweringARM32.cpp - Issue 1438773004: Subzero. ARM32. Improve constant lowering.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: git pull Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1279 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1290 }	1290 }

1291 _mov(Dest, SP);	1291 _mov(Dest, SP);

1292 }	1292 }

1293	1293

1294 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {	1294 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {

1295 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))	1295 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))

1296 return;	1296 return;

1297 Variable *SrcLoReg = legalizeToReg(SrcLo);	1297 Variable *SrcLoReg = legalizeToReg(SrcLo);

1298 switch (Ty) {	1298 switch (Ty) {

1299 default:	1299 default:

1300 llvm_unreachable("Unexpected type");	1300 llvm::report_fatal_error("Unexpected type");

1301 case IceType_i8: {	1301 case IceType_i8:

1302 Operand *Mask =

1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg \| Legal_Flex);

1304 _tst(SrcLoReg, Mask);

1305 break;

1306 }

1307 case IceType_i16: {	1302 case IceType_i16: {

1308 Operand *Mask =	1303 Operand *ShAmtF =

1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg \| Legal_Flex);	1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),

1310 _tst(SrcLoReg, Mask);	1305 Legal_Reg \| Legal_Flex);

1311 break;	1306 Variable *T = makeReg(IceType_i32);

1312 }	1307 _lsls(T, SrcLoReg, ShAmtF);

	1308 Context.insert(InstFakeUse::create(Func, T));

	1309 } break;

1313 case IceType_i32: {	1310 case IceType_i32: {

1314 _tst(SrcLoReg, SrcLoReg);	1311 _tst(SrcLoReg, SrcLoReg);

1315 break;	1312 break;

1316 }	1313 }

1317 case IceType_i64: {	1314 case IceType_i64: {

1318 Variable *ScratchReg = makeReg(IceType_i32);	1315 Variable *T = makeReg(IceType_i32);

1319 _orrs(ScratchReg, SrcLoReg, SrcHi);	1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg \| Legal_Flex));

1320 // ScratchReg isn't going to be used, but we need the side-effect of	1317 // T isn't going to be used, but we need the side-effect of setting flags

1321 // setting flags from this operation.	1318 // from this operation.

1322 Context.insert(InstFakeUse::create(Func, ScratchReg));	1319 Context.insert(InstFakeUse::create(Func, T));

1323 }	1320 }

1324 }	1321 }

1325 InstARM32Label *Label = InstARM32Label::create(Func, this);	1322 InstARM32Label *Label = InstARM32Label::create(Func, this);

1326 _br(Label, CondARM32::NE);	1323 _br(Label, CondARM32::NE);

1327 _trap();	1324 _trap();

1328 Context.insert(Label);	1325 Context.insert(Label);

1329 }	1326 }

1330	1327

1331 void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,	1328 void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,

1332 Operand *Src1, ExtInstr ExtFunc,	1329 Operand *Src1, ExtInstr ExtFunc,

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1397 _orr(T, Src0, Src1RF);	1394 _orr(T, Src0, Src1RF);

1398 break;	1395 break;

1399 case InstArithmetic::Xor:	1396 case InstArithmetic::Xor:

1400 _eor(T, Src0, Src1RF);	1397 _eor(T, Src0, Src1RF);

1401 break;	1398 break;

1402 }	1399 }

1403 _mov(Dest, T);	1400 _mov(Dest, T);

1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;	1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;

1405 }	1402 }

1406	1403

	1404 namespace {

	1405 // NumericOperands is used during arithmetic/icmp lowering for constant folding.

	1406 // It holds the two operantion's sources, and maintains some state as to whether
	Jim Stichnoth 2015/11/16 23:06:26 operations' operations' John 2015/11/17 22:17:05 well, this is the two sources of the operation n Show quoted text On 2015/11/16 23:06:26, stichnot wrote: > operations' well, this is the two sources of the operation not the sources of the two operations This is now "it holds the two source operands."
	1407 // one of them is a constant. If one of the operands is a constant, then it will

	1408 // be stored as the operation's second source, with a bit indicating whether the

	1409 // operands were swapped.

	1410 //

	1411 // The class is split in a base class with operand type-independent methods, and
	Jim Stichnoth 2015/11/16 23:06:26 split into ? split into ? John 2015/11/17 22:17:05 Done. Show quoted text On 2015/11/16 23:06:26, stichnot wrote: > split into ? Done.
	1412 // a derived, templated class, for each type of operand we want to fold

	1413 // constants for:

	1414 //

	1415 // NumericOperandsBase --> NumericOperands<ConstantFloat>

	1416 // --> NumericOperands<ConstantDouble>

	1417 // --> NumericOperands<ConstantInt32>

	1418 //

	1419 // NumericOperands<ConstantInt32> also exposes helper methods for emiting
	Jim Stichnoth 2015/11/16 23:06:26 emitting emitting John 2015/11/17 22:17:05 Done. Show quoted text On 2015/11/16 23:06:26, stichnot wrote: > emitting Done.
	1420 // inverted/negated immediates.

	1421 class NumericOperandsBase {

	1422 NumericOperandsBase() = delete;

	1423 NumericOperandsBase(const NumericOperandsBase &) = delete;

	1424 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;

	1425

	1426 public:

	1427 NumericOperandsBase(Operand S0, Operand S1)

	1428 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),

	1429 Swapped(Src0 == S1 && S0 != S1) {

	1430 assert(Src0 != nullptr);

	1431 assert(Src1 != nullptr);

	1432 assert(Src0 != Src1 \|\| S0 == S1);

	1433 }

	1434

	1435 bool hasConstOperand() const {

	1436 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);

	1437 }

	1438

	1439 bool swappedOperands() const { return Swapped; }

	1440

	1441 Variable src0R(TargetARM32 Target) const {

	1442 return legalizeToReg(Target, Src0);

	1443 }

	1444

	1445 Variable unswappedSrc0R(TargetARM32 Target) const {

	1446 return legalizeToReg(Target, Swapped ? Src1 : Src0);

	1447 }

	1448

	1449 Operand src1RF(TargetARM32 Target) const {

	1450 return legalizeToRegOrFlex(Target, Src1);

	1451 }

	1452

	1453 Variable unswappedSrc1R(TargetARM32 Target) const {

	1454 return legalizeToReg(Target, Swapped ? Src0 : Src1);

	1455 }

	1456

	1457 Operand unswappedSrc1RF(TargetARM32 Target) const {

	1458 return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1);

	1459 }

	1460

	1461 protected:

	1462 Operand *const Src0;

	1463 Operand *const Src1;

	1464 const bool Swapped;

	1465

	1466 static Variable legalizeToReg(TargetARM32 Target, Operand *Src) {

	1467 return Target->legalizeToReg(Src);

	1468 }

	1469

	1470 static Operand legalizeToRegOrFlex(TargetARM32 Target, Operand *Src) {

	1471 return Target->legalize(Src,

	1472 TargetARM32::Legal_Reg \| TargetARM32::Legal_Flex);

	1473 }

	1474

	1475 private:

	1476 static Operand NonConstOperand(Operand S0, Operand *S1) {

	1477 if (!llvm::isa<Constant>(S0))

	1478 return S0;

	1479 if (!llvm::isa<Constant>(S1))

	1480 return S1;

	1481 if (llvm::isa<ConstantRelocatable>(S1) &&

	1482 !llvm::isa<ConstantRelocatable>(S0))

	1483 return S1;

	1484 return S0;

	1485 }

	1486

	1487 static Operand ConstOperand(Operand S0, Operand *S1) {

	1488 if (!llvm::isa<Constant>(S0))

	1489 return S1;

	1490 if (!llvm::isa<Constant>(S1))

	1491 return S0;

	1492 if (llvm::isa<ConstantRelocatable>(S1) &&

	1493 !llvm::isa<ConstantRelocatable>(S0))

	1494 return S0;

	1495 return S1;

	1496 }

	1497 };

	1498

	1499 template <typename C> class NumericOperands : public NumericOperandsBase {

	1500 NumericOperands() = delete;

	1501 NumericOperands(const NumericOperands &) = delete;

	1502 NumericOperands &operator=(const NumericOperands &) = delete;

	1503

	1504 public:

	1505 NumericOperands(Operand S0, Operand S1) : NumericOperandsBase(S0, S1) {

	1506 assert(!hasConstOperand() \|\| llvm::isa<C>(this->Src1));

	1507 }

	1508

	1509 typename C::PrimType getConstantValue() const {

	1510 return llvm::cast<C>(Src1)->getValue();

	1511 }

	1512 };

	1513

	1514 using FloatOperands = NumericOperands<ConstantFloat>;

	1515 using DoubleOperands = NumericOperands<ConstantDouble>;

	1516

	1517 class Int32Operands : public NumericOperands<ConstantInteger32> {

	1518 Int32Operands() = delete;

	1519 Int32Operands(const Int32Operands &) = delete;

	1520 Int32Operands &operator=(const Int32Operands &) = delete;

	1521

	1522 public:

	1523 Int32Operands(Operand S0, Operand S1) : NumericOperands(S0, S1) {}

	1524

	1525 bool immediateIsFlexEncodable() const {

	1526 uint32_t Rotate, Imm8;

	1527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);

	1528 }

	1529

	1530 bool negatedImmediateIsFlexEncodable() const {

	1531 uint32_t Rotate, Imm8;

	1532 return OperandARM32FlexImm::canHoldImm(

	1533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);

	1534 }

	1535

	1536 Operand negatedSrc1F(TargetARM32 Target) const {

	1537 return legalizeToRegOrFlex(Target,

	1538 Target->getCtx()->getConstantInt32(

	1539 -static_cast<int32_t>(getConstantValue())));

	1540 }

	1541

	1542 bool invertedImmediateIsFlexEncodable() const {

	1543 uint32_t Rotate, Imm8;

	1544 return OperandARM32FlexImm::canHoldImm(

	1545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);

	1546 }

	1547

	1548 Operand invertedSrc1F(TargetARM32 Target) const {

	1549 return legalizeToRegOrFlex(Target,

	1550 Target->getCtx()->getConstantInt32(

	1551 ~static_cast<uint32_t>(getConstantValue())));

	1552 }

	1553 };

	1554 } // end of anonymous namespace

	1555

	1556 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,

	1557 Variable Dest, Operand Src0,

	1558 Operand *Src1) {

	1559 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));

	1560 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));

	1561 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());

	1562 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());

	1563

	1564 // These helper-call-involved instructions are lowered in this separate

	1565 // switch. This is because we would otherwise assume that we need to

	1566 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

	1567 // helper calls, and such unused/redundant instructions will fail liveness

	1568 // analysis under -Om1 setting.

	1569 switch (Op) {

	1570 default:

	1571 break;

	1572 case InstArithmetic::Udiv:

	1573 case InstArithmetic::Sdiv:

	1574 case InstArithmetic::Urem:

	1575 case InstArithmetic::Srem: {

	1576 // Check for divide by 0 (ARM normally doesn't trap, but we want it to

	1577 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

	1578 // register, which will hide a constant source operand. Instead, check

	1579 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

	1580 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {

	1581 if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) {

	1582 _trap();

	1583 return;

	1584 }

	1585 } else {

	1586 Operand *Src1Lo = SrcsLo.unswappedSrc1R(this);

	1587 Operand *Src1Hi = SrcsHi.unswappedSrc1R(this);

	1588 div0Check(IceType_i64, Src1Lo, Src1Hi);

	1589 }

	1590 // Technically, ARM has its own aeabi routines, but we can use the

	1591 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

	1592 // the more standard __moddi3 for rem.

	1593 const char *HelperName = "";

	1594 switch (Op) {

	1595 default:

	1596 llvm::report_fatal_error("Should have only matched div ops.");

	1597 break;

	1598 case InstArithmetic::Udiv:

	1599 HelperName = H_udiv_i64;

	1600 break;

	1601 case InstArithmetic::Sdiv:

	1602 HelperName = H_sdiv_i64;

	1603 break;

	1604 case InstArithmetic::Urem:

	1605 HelperName = H_urem_i64;

	1606 break;

	1607 case InstArithmetic::Srem:

	1608 HelperName = H_srem_i64;

	1609 break;

	1610 }

	1611 constexpr SizeT MaxSrcs = 2;

	1612 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

	1613 Call->addArg(Src0);

	1614 Call->addArg(Src1);

	1615 lowerCall(Call);

	1616 return;

	1617 }

	1618 }

	1619

	1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1622 Variable *T_Lo = makeReg(DestLo->getType());

	1623 Variable *T_Hi = makeReg(DestHi->getType());

	1624

	1625 switch (Op) {

	1626 case InstArithmetic::_num:

	1627 llvm::report_fatal_error("Unknown arithmetic operator");

	1628 return;

	1629 case InstArithmetic::Add:

	1630 _adds(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));
	Jim Stichnoth 2015/11/16 23:06:26 Don't do this. src0R() and src1RF() have side eff Don't do this. src0R() and src1RF() have side effects whose order is undefined. The lowering could be different across different C++ compilers, and this could affect register allocation. John 2015/11/17 22:17:05 doh... done. Show quoted text On 2015/11/16 23:06:26, stichnot wrote: > Don't do this. src0R() and src1RF() have side effects whose order is undefined. > The lowering could be different across different C++ compilers, and this could > affect register allocation. doh... done.
	1631 _mov(DestLo, T_Lo);

	1632 _adc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));

	1633 _mov(DestHi, T_Hi);

	1634 return;

	1635 case InstArithmetic::And:

	1636 _and(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));

	1637 _mov(DestLo, T_Lo);

	1638 _and(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));

	1639 _mov(DestHi, T_Hi);

	1640 return;

	1641 case InstArithmetic::Or:

	1642 _orr(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));

	1643 _mov(DestLo, T_Lo);

	1644 _orr(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));

	1645 _mov(DestHi, T_Hi);

	1646 return;

	1647 case InstArithmetic::Xor:

	1648 _eor(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));

	1649 _mov(DestLo, T_Lo);

	1650 _eor(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));

	1651 _mov(DestHi, T_Hi);

	1652 return;

	1653 case InstArithmetic::Sub:

	1654 if (SrcsLo.swappedOperands()) {

	1655 _rsbs(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));

	1656 _mov(DestLo, T_Lo);

	1657 _rsc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));

	1658 _mov(DestHi, T_Hi);

	1659 } else {

	1660 _subs(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this));

	1661 _mov(DestLo, T_Lo);

	1662 _sbc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this));

	1663 _mov(DestHi, T_Hi);

	1664 }

	1665 return;

	1666 case InstArithmetic::Mul: {

	1667 // GCC 4.8 does:

	1668 // a=b*c ==>

	1669 // t_acc =(mul) (b.lo * c.hi)

	1670 // t_acc =(mla) (c.lo * b.hi) + t_acc

	1671 // t.hi,t.lo =(umull) b.lo * c.lo

	1672 // t.hi += t_acc

	1673 // a.lo = t.lo

	1674 // a.hi = t.hi

	1675 //

	1676 // LLVM does:

	1677 // t.hi,t.lo =(umull) b.lo * c.lo

	1678 // t.hi =(mla) (b.lo * c.hi) + t.hi

	1679 // t.hi =(mla) (b.hi * c.lo) + t.hi

	1680 // a.lo = t.lo

	1681 // a.hi = t.hi

	1682 //

	1683 // LLVM's lowering has fewer instructions, but more register pressure:

	1684 // t.lo is live from beginning to end, while GCC delays the two-dest

	1685 // instruction till the end, and kills c.hi immediately.

	1686 Variable *T_Acc = makeReg(IceType_i32);

	1687 Variable *T_Acc1 = makeReg(IceType_i32);

	1688 Variable *T_Hi1 = makeReg(IceType_i32);

	1689 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);

	1690 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);

	1691 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);

	1692 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);

	1693 _mul(T_Acc, Src0RLo, Src1RHi);

	1694 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

	1695 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

	1696 _add(T_Hi, T_Hi1, T_Acc1);

	1697 _mov(DestLo, T_Lo);

	1698 _mov(DestHi, T_Hi);

	1699 return;

	1700 }

	1701 case InstArithmetic::Shl: {

	1702 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {

	1703 Variable *Src0RLo = SrcsLo.src0R(this);

	1704 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

	1705 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;

	1706 if (ShAmtImm == 0) {

	1707 _mov(DestLo, Src0RLo);

	1708 _mov(DestHi, SrcsHi.src0R(this));

	1709 return;

	1710 }

	1711

	1712 if (ShAmtImm >= 32) {

	1713 if (ShAmtImm == 32) {

	1714 _mov(DestHi, Src0RLo);

	1715 } else {

	1716 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

	1717 Legal_Reg \| Legal_Flex);

	1718 _lsl(T_Hi, Src0RLo, ShAmtOp);

	1719 _mov(DestHi, T_Hi);

	1720 }

	1721

	1722 Operand *_0 =

	1723 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1724 _mov(T_Lo, _0);

	1725 _mov(DestLo, T_Lo);

	1726 return;

	1727 }

	1728

	1729 Variable *Src0RHi = SrcsHi.src0R(this);

	1730 Operand *ShAmtOp =

	1731 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg \| Legal_Flex);

	1732 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

	1733 Legal_Reg \| Legal_Flex);

	1734 _lsl(T_Hi, Src0RHi, ShAmtOp);

	1735 _orr(T_Hi, T_Hi,

	1736 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

	1737 OperandARM32::LSR, ComplShAmtOp));

	1738 _mov(DestHi, T_Hi);

	1739

	1740 _lsl(T_Lo, Src0RLo, ShAmtOp);

	1741 _mov(DestLo, T_Lo);

	1742 return;

	1743 }

	1744

	1745 // a=b<<c ==>

	1746 // pnacl-llc does:

	1747 // mov t_b.lo, b.lo

	1748 // mov t_b.hi, b.hi

	1749 // mov t_c.lo, c.lo

	1750 // rsb T0, t_c.lo, #32

	1751 // lsr T1, t_b.lo, T0

	1752 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

	1753 // sub T2, t_c.lo, #32

	1754 // cmp T2, #0

	1755 // lslge t_a.hi, t_b.lo, T2

	1756 // lsl t_a.lo, t_b.lo, t_c.lo

	1757 // mov a.lo, t_a.lo

	1758 // mov a.hi, t_a.hi

	1759 //

	1760 // GCC 4.8 does:

	1761 // sub t_c1, c.lo, #32

	1762 // lsl t_hi, b.hi, c.lo

	1763 // orr t_hi, t_hi, b.lo, lsl t_c1

	1764 // rsb t_c2, c.lo, #32

	1765 // orr t_hi, t_hi, b.lo, lsr t_c2

	1766 // lsl t_lo, b.lo, c.lo

	1767 // a.lo = t_lo

	1768 // a.hi = t_hi

	1769 //

	1770 // These are incompatible, therefore we mimic pnacl-llc.

	1771 // Can be strength-reduced for constant-shifts, but we don't do that for

	1772 // now.

	1773 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

	1774 // ARM, shifts only take the lower 8 bits of the shift register, and

	1775 // saturate to the range 0-32, so the negative value will saturate to 32.

	1776 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg \| Legal_Flex);
	Jim Stichnoth 2015/11/16 23:06:26 Maybe this should be named _32RF? Maybe this should be named _32RF? John 2015/11/17 22:17:05 I'd rather not. This is the number 32, not somethi Show quoted text On 2015/11/16 23:06:26, stichnot wrote: > Maybe this should be named _32RF? I'd rather not. This is the number 32, not something that might end up in a register. It is a flex operand because arm does not have immediates, it has flexible immediates* I could rename this _32F, but then I would be confused about this being 32.0. I'll leave _32. except for mov/movt/call *which are awesome.
	1777 Operand *_0 =

	1778 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1779 Variable *T0 = makeReg(IceType_i32);

	1780 Variable *T1 = makeReg(IceType_i32);

	1781 Variable *T2 = makeReg(IceType_i32);

	1782 Variable *TA_Hi = makeReg(IceType_i32);

	1783 Variable *TA_Lo = makeReg(IceType_i32);

	1784 Variable *Src0RLo = SrcsLo.src0R(this);

	1785 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);

	1786 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);

	1787 _rsb(T0, Src1RLo, _32);

	1788 _lsr(T1, Src0RLo, T0);

	1789 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1790 OperandARM32::LSL, Src1RLo));

	1791 _sub(T2, Src1RLo, _32);

	1792 _cmp(T2, _0);

	1793 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

	1794 _set_dest_redefined();

	1795 _lsl(TA_Lo, Src0RLo, Src1RLo);

	1796 _mov(DestLo, TA_Lo);

	1797 _mov(DestHi, TA_Hi);

	1798 return;

	1799 }

	1800 case InstArithmetic::Lshr:

	1801 case InstArithmetic::Ashr: {

	1802 const bool ASR = Op == InstArithmetic::Ashr;

	1803 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {

	1804 Variable *Src0RHi = SrcsHi.src0R(this);

	1805 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

	1806 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;

	1807 if (ShAmtImm == 0) {

	1808 _mov(DestHi, Src0RHi);

	1809 _mov(DestLo, SrcsLo.src0R(this));

	1810 return;

	1811 }

	1812

	1813 if (ShAmtImm >= 32) {

	1814 if (ShAmtImm == 32) {

	1815 _mov(DestLo, Src0RHi);

	1816 } else {

	1817 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

	1818 Legal_Reg \| Legal_Flex);

	1819 if (ASR) {

	1820 _asr(T_Lo, Src0RHi, ShAmtOp);

	1821 } else {

	1822 _lsr(T_Lo, Src0RHi, ShAmtOp);

	1823 }

	1824 _mov(DestLo, T_Lo);

	1825 }

	1826

	1827 if (ASR) {

	1828 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),

	1829 Legal_Reg \| Legal_Flex);

	1830 _asr(T_Hi, Src0RHi, _31);

	1831 } else {

	1832 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),

	1833 Legal_Reg \| Legal_Flex);

	1834 _mov(T_Hi, _0);

	1835 }

	1836 _mov(DestHi, T_Hi);

	1837 return;

	1838 }

	1839

	1840 Variable *Src0RLo = SrcsLo.src0R(this);

	1841 Operand *ShAmtOp =

	1842 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg \| Legal_Flex);

	1843 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

	1844 Legal_Reg \| Legal_Flex);

	1845 _lsr(T_Lo, Src0RLo, ShAmtOp);

	1846 _orr(T_Lo, T_Lo,

	1847 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1848 OperandARM32::LSL, ComplShAmtOp));

	1849 _mov(DestLo, T_Lo);

	1850

	1851 if (ASR) {

	1852 _asr(T_Hi, Src0RHi, ShAmtOp);

	1853 } else {

	1854 _lsr(T_Hi, Src0RHi, ShAmtOp);

	1855 }

	1856 _mov(DestHi, T_Hi);

	1857 return;

	1858 }

	1859

	1860 // a=b>>c

	1861 // pnacl-llc does:

	1862 // mov t_b.lo, b.lo

	1863 // mov t_b.hi, b.hi

	1864 // mov t_c.lo, c.lo

	1865 // lsr T0, t_b.lo, t_c.lo

	1866 // rsb T1, t_c.lo, #32

	1867 // orr t_a.lo, T0, t_b.hi, lsl T1

	1868 // sub T2, t_c.lo, #32

	1869 // cmp T2, #0

	1870 // [al]srge t_a.lo, t_b.hi, T2

	1871 // [al]sr t_a.hi, t_b.hi, t_c.lo

	1872 // mov a.lo, t_a.lo

	1873 // mov a.hi, t_a.hi

	1874 //

	1875 // GCC 4.8 does (lsr):

	1876 // rsb t_c1, c.lo, #32

	1877 // lsr t_lo, b.lo, c.lo

	1878 // orr t_lo, t_lo, b.hi, lsl t_c1

	1879 // sub t_c2, c.lo, #32

	1880 // orr t_lo, t_lo, b.hi, lsr t_c2

	1881 // lsr t_hi, b.hi, c.lo

	1882 // mov a.lo, t_lo

	1883 // mov a.hi, t_hi

	1884 //

	1885 // These are incompatible, therefore we mimic pnacl-llc.

	1886 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg \| Legal_Flex);

	1887 Operand *_0 =

	1888 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1889 Variable *T0 = makeReg(IceType_i32);

	1890 Variable *T1 = makeReg(IceType_i32);

	1891 Variable *T2 = makeReg(IceType_i32);

	1892 Variable *TA_Lo = makeReg(IceType_i32);

	1893 Variable *TA_Hi = makeReg(IceType_i32);

	1894 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);

	1895 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);

	1896 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);

	1897 _lsr(T0, Src0RLo, Src1RLo);

	1898 _rsb(T1, Src1RLo, _32);

	1899 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1900 OperandARM32::LSL, T1));

	1901 _sub(T2, Src1RLo, _32);

	1902 _cmp(T2, _0);

	1903 if (ASR) {

	1904 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

	1905 _set_dest_redefined();

	1906 _asr(TA_Hi, Src0RHi, Src1RLo);

	1907 } else {

	1908 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

	1909 _set_dest_redefined();

	1910 _lsr(TA_Hi, Src0RHi, Src1RLo);

	1911 }

	1912 _mov(DestLo, TA_Lo);

	1913 _mov(DestHi, TA_Hi);

	1914 return;

	1915 }

	1916 case InstArithmetic::Fadd:

	1917 case InstArithmetic::Fsub:

	1918 case InstArithmetic::Fmul:

	1919 case InstArithmetic::Fdiv:

	1920 case InstArithmetic::Frem:

	1921 llvm::report_fatal_error("FP instruction with i64 type");

	1922 return;

	1923 case InstArithmetic::Udiv:

	1924 case InstArithmetic::Sdiv:

	1925 case InstArithmetic::Urem:

	1926 case InstArithmetic::Srem:

	1927 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "

	1928 "should have already been handled before");

	1929 return;

	1930 }

	1931 }

	1932

1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {	1933 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

1408 Variable *Dest = Inst->getDest();	1934 Variable *Dest = Inst->getDest();

1409 if (Dest->getType() == IceType_i1) {	1935 if (Dest->getType() == IceType_i1) {

1410 lowerInt1Arithmetic(Inst);	1936 lowerInt1Arithmetic(Inst);

1411 return;	1937 return;

1412 }	1938 }

1413	1939

1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to

1415 // legalize Src0 to flex or Src1 to flex and there is a reversible

1416 // instruction. E.g., reverse subtract with immediate, register vs register,

1417 // immediate.

1418 // Or it may be the case that the operands aren't swapped, but the bits can

1419 // be flipped and a different operation applied. E.g., use BIC (bit clear)

1420 // instead of AND for some masks.

1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	1940 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	1941 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

1423 if (Dest->getType() == IceType_i64) {	1942 if (Dest->getType() == IceType_i64) {

1424 // These helper-call-involved instructions are lowered in this separate	1943 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);

1425 // switch. This is because we would otherwise assume that we need to	1944 return;

1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with	1945 }

1427 // helper calls, and such unused/redundant instructions will fail liveness	1946

1428 // analysis under -Om1 setting.	1947 if (isVectorType(Dest->getType())) {

1429 switch (Inst->getOp()) {

1430 default:

1431 break;

1432 case InstArithmetic::Udiv:

1433 case InstArithmetic::Sdiv:

1434 case InstArithmetic::Urem:

1435 case InstArithmetic::Srem: {

1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to

1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

1438 // register, which will hide a constant source operand. Instead, check

1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

1441 if (C64->getValue() == 0) {

1442 _trap();

1443 return;

1444 }

1445 } else {

1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

1448 div0Check(IceType_i64, Src1Lo, Src1Hi);

1449 }

1450 // Technically, ARM has their own aeabi routines, but we can use the

1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

1452 // the more standard __moddi3 for rem.

1453 const char *HelperName = "";

1454 switch (Inst->getOp()) {

1455 default:

1456 llvm_unreachable("Should have only matched div ops.");

1457 break;

1458 case InstArithmetic::Udiv:

1459 HelperName = H_udiv_i64;

1460 break;

1461 case InstArithmetic::Sdiv:

1462 HelperName = H_sdiv_i64;

1463 break;

1464 case InstArithmetic::Urem:

1465 HelperName = H_urem_i64;

1466 break;

1467 case InstArithmetic::Srem:

1468 HelperName = H_srem_i64;

1469 break;

1470 }

1471 constexpr SizeT MaxSrcs = 2;

1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

1473 Call->addArg(Src0);

1474 Call->addArg(Src1);

1475 lowerCall(Call);

1476 return;

1477 }

1478 }

1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0));

1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));

1483 Operand *Src1Lo = loOperand(Src1);

1484 Operand *Src1Hi = hiOperand(Src1);

1485 Variable *T_Lo = makeReg(DestLo->getType());

1486 Variable *T_Hi = makeReg(DestHi->getType());

1487 switch (Inst->getOp()) {

1488 case InstArithmetic::_num:

1489 llvm_unreachable("Unknown arithmetic operator");

1490 return;

1491 case InstArithmetic::Add:

1492 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1493 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1494 _adds(T_Lo, Src0RLo, Src1Lo);

1495 _mov(DestLo, T_Lo);

1496 _adc(T_Hi, Src0RHi, Src1Hi);

1497 _mov(DestHi, T_Hi);

1498 return;

1499 case InstArithmetic::And:

1500 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1501 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1502 _and(T_Lo, Src0RLo, Src1Lo);

1503 _mov(DestLo, T_Lo);

1504 _and(T_Hi, Src0RHi, Src1Hi);

1505 _mov(DestHi, T_Hi);

1506 return;

1507 case InstArithmetic::Or:

1508 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1509 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1510 _orr(T_Lo, Src0RLo, Src1Lo);

1511 _mov(DestLo, T_Lo);

1512 _orr(T_Hi, Src0RHi, Src1Hi);

1513 _mov(DestHi, T_Hi);

1514 return;

1515 case InstArithmetic::Xor:

1516 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1517 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1518 _eor(T_Lo, Src0RLo, Src1Lo);

1519 _mov(DestLo, T_Lo);

1520 _eor(T_Hi, Src0RHi, Src1Hi);

1521 _mov(DestHi, T_Hi);

1522 return;

1523 case InstArithmetic::Sub:

1524 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1525 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1526 _subs(T_Lo, Src0RLo, Src1Lo);

1527 _mov(DestLo, T_Lo);

1528 _sbc(T_Hi, Src0RHi, Src1Hi);

1529 _mov(DestHi, T_Hi);

1530 return;

1531 case InstArithmetic::Mul: {

1532 // GCC 4.8 does:

1533 // a=b*c ==>

1534 // t_acc =(mul) (b.lo * c.hi)

1535 // t_acc =(mla) (c.lo * b.hi) + t_acc

1536 // t.hi,t.lo =(umull) b.lo * c.lo

1537 // t.hi += t_acc

1538 // a.lo = t.lo

1539 // a.hi = t.hi

1540 //

1541 // LLVM does:

1542 // t.hi,t.lo =(umull) b.lo * c.lo

1543 // t.hi =(mla) (b.lo * c.hi) + t.hi

1544 // t.hi =(mla) (b.hi * c.lo) + t.hi

1545 // a.lo = t.lo

1546 // a.hi = t.hi

1547 //

1548 // LLVM's lowering has fewer instructions, but more register pressure:

1549 // t.lo is live from beginning to end, while GCC delays the two-dest

1550 // instruction till the end, and kills c.hi immediately.

1551 Variable *T_Acc = makeReg(IceType_i32);

1552 Variable *T_Acc1 = makeReg(IceType_i32);

1553 Variable *T_Hi1 = makeReg(IceType_i32);

1554 Variable *Src1RLo = legalizeToReg(Src1Lo);

1555 Variable *Src1RHi = legalizeToReg(Src1Hi);

1556 _mul(T_Acc, Src0RLo, Src1RHi);

1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

1559 _add(T_Hi, T_Hi1, T_Acc1);

1560 _mov(DestLo, T_Lo);

1561 _mov(DestHi, T_Hi);

1562 return;

1563 }

1564 case InstArithmetic::Shl: {

1565 // a=b<<c ==>

1566 // pnacl-llc does:

1567 // mov t_b.lo, b.lo

1568 // mov t_b.hi, b.hi

1569 // mov t_c.lo, c.lo

1570 // rsb T0, t_c.lo, #32

1571 // lsr T1, t_b.lo, T0

1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

1573 // sub T2, t_c.lo, #32

1574 // cmp T2, #0

1575 // lslge t_a.hi, t_b.lo, T2

1576 // lsl t_a.lo, t_b.lo, t_c.lo

1577 // mov a.lo, t_a.lo

1578 // mov a.hi, t_a.hi

1579 //

1580 // GCC 4.8 does:

1581 // sub t_c1, c.lo, #32

1582 // lsl t_hi, b.hi, c.lo

1583 // orr t_hi, t_hi, b.lo, lsl t_c1

1584 // rsb t_c2, c.lo, #32

1585 // orr t_hi, t_hi, b.lo, lsr t_c2

1586 // lsl t_lo, b.lo, c.lo

1587 // a.lo = t_lo

1588 // a.hi = t_hi

1589 //

1590 // These are incompatible, therefore we mimic pnacl-llc.

1591 // Can be strength-reduced for constant-shifts, but we don't do that for

1592 // now.

1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

1594 // ARM, shifts only take the lower 8 bits of the shift register, and

1595 // saturate to the range 0-32, so the negative value will saturate to 32.

1596 Constant *_32 = Ctx->getConstantInt32(32);

1597 Constant *_0 = Ctx->getConstantZero(IceType_i32);

1598 Variable *Src1RLo = legalizeToReg(Src1Lo);

1599 Variable *T0 = makeReg(IceType_i32);

1600 Variable *T1 = makeReg(IceType_i32);

1601 Variable *T2 = makeReg(IceType_i32);

1602 Variable *TA_Hi = makeReg(IceType_i32);

1603 Variable *TA_Lo = makeReg(IceType_i32);

1604 _rsb(T0, Src1RLo, _32);

1605 _lsr(T1, Src0RLo, T0);

1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

1607 OperandARM32::LSL, Src1RLo));

1608 _sub(T2, Src1RLo, _32);

1609 _cmp(T2, _0);

1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

1611 _set_dest_redefined();

1612 _lsl(TA_Lo, Src0RLo, Src1RLo);

1613 _mov(DestLo, TA_Lo);

1614 _mov(DestHi, TA_Hi);

1615 return;

1616 }

1617 case InstArithmetic::Lshr:

1618 case InstArithmetic::Ashr: {

1619 // a=b>>c

1620 // pnacl-llc does:

1621 // mov t_b.lo, b.lo

1622 // mov t_b.hi, b.hi

1623 // mov t_c.lo, c.lo

1624 // lsr T0, t_b.lo, t_c.lo

1625 // rsb T1, t_c.lo, #32

1626 // orr t_a.lo, T0, t_b.hi, lsl T1

1627 // sub T2, t_c.lo, #32

1628 // cmp T2, #0

1629 // [al]srge t_a.lo, t_b.hi, T2

1630 // [al]sr t_a.hi, t_b.hi, t_c.lo

1631 // mov a.lo, t_a.lo

1632 // mov a.hi, t_a.hi

1633 //

1634 // GCC 4.8 does (lsr):

1635 // rsb t_c1, c.lo, #32

1636 // lsr t_lo, b.lo, c.lo

1637 // orr t_lo, t_lo, b.hi, lsl t_c1

1638 // sub t_c2, c.lo, #32

1639 // orr t_lo, t_lo, b.hi, lsr t_c2

1640 // lsr t_hi, b.hi, c.lo

1641 // mov a.lo, t_lo

1642 // mov a.hi, t_hi

1643 //

1644 // These are incompatible, therefore we mimic pnacl-llc.

1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;

1646 Constant *_32 = Ctx->getConstantInt32(32);

1647 Constant *_0 = Ctx->getConstantZero(IceType_i32);

1648 Variable *Src1RLo = legalizeToReg(Src1Lo);

1649 Variable *T0 = makeReg(IceType_i32);

1650 Variable *T1 = makeReg(IceType_i32);

1651 Variable *T2 = makeReg(IceType_i32);

1652 Variable *TA_Lo = makeReg(IceType_i32);

1653 Variable *TA_Hi = makeReg(IceType_i32);

1654 _lsr(T0, Src0RLo, Src1RLo);

1655 _rsb(T1, Src1RLo, _32);

1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

1657 OperandARM32::LSL, T1));

1658 _sub(T2, Src1RLo, _32);

1659 _cmp(T2, _0);

1660 if (IsAshr) {

1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

1662 _set_dest_redefined();

1663 _asr(TA_Hi, Src0RHi, Src1RLo);

1664 } else {

1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

1666 _set_dest_redefined();

1667 _lsr(TA_Hi, Src0RHi, Src1RLo);

1668 }

1669 _mov(DestLo, TA_Lo);

1670 _mov(DestHi, TA_Hi);

1671 return;

1672 }

1673 case InstArithmetic::Fadd:

1674 case InstArithmetic::Fsub:

1675 case InstArithmetic::Fmul:

1676 case InstArithmetic::Fdiv:

1677 case InstArithmetic::Frem:

1678 llvm_unreachable("FP instruction with i64 type");

1679 return;

1680 case InstArithmetic::Udiv:

1681 case InstArithmetic::Sdiv:

1682 case InstArithmetic::Urem:

1683 case InstArithmetic::Srem:

1684 llvm_unreachable("Call-helper-involved instruction for i64 type "

1685 "should have already been handled before");

1686 return;

1687 }

1688 return;

1689 } else if (isVectorType(Dest->getType())) {

1690 // Add a fake def to keep liveness consistent in the meantime.	1948 // Add a fake def to keep liveness consistent in the meantime.

1691 Variable *T = makeReg(Dest->getType());	1949 Variable *T = makeReg(Dest->getType());

1692 Context.insert(InstFakeDef::create(Func, T));	1950 Context.insert(InstFakeDef::create(Func, T));

1693 _mov(Dest, T);	1951 _mov(Dest, T);

1694 UnimplementedError(Func->getContext()->getFlags());	1952 UnimplementedError(Func->getContext()->getFlags());

1695 return;	1953 return;

1696 }	1954 }

	1955

1697 // Dest->getType() is a non-i64 scalar.	1956 // Dest->getType() is a non-i64 scalar.

1698 Variable *Src0R = legalizeToReg(Src0);

1699 Variable *T = makeReg(Dest->getType());	1957 Variable *T = makeReg(Dest->getType());

1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect	1958

	1959 // * Handle div/rem separately. They require a non-legalized Src1 to inspect

1701 // whether or not Src1 is a non-zero constant. Once legalized it is more	1960 // whether or not Src1 is a non-zero constant. Once legalized it is more

1702 // difficult to determine (constant may be moved to a register).	1961 // difficult to determine (constant may be moved to a register).

	1962 // * Handle floating point arithmetic separately: they require Src1 to be

	1963 // legalized to a register.

1703 switch (Inst->getOp()) {	1964 switch (Inst->getOp()) {

1704 default:	1965 default:

1705 break;	1966 break;

1706 case InstArithmetic::Udiv: {	1967 case InstArithmetic::Udiv: {

1707 constexpr bool NotRemainder = false;	1968 constexpr bool NotRemainder = false;

	1969 Variable *Src0R = legalizeToReg(Src0);

1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,	1970 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,

1709 H_udiv_i32, NotRemainder);	1971 H_udiv_i32, NotRemainder);

1710 return;	1972 return;

1711 }	1973 }

1712 case InstArithmetic::Sdiv: {	1974 case InstArithmetic::Sdiv: {

1713 constexpr bool NotRemainder = false;	1975 constexpr bool NotRemainder = false;

	1976 Variable *Src0R = legalizeToReg(Src0);

1714 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,	1977 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,

1715 H_sdiv_i32, NotRemainder);	1978 H_sdiv_i32, NotRemainder);

1716 return;	1979 return;

1717 }	1980 }

1718 case InstArithmetic::Urem: {	1981 case InstArithmetic::Urem: {

1719 constexpr bool IsRemainder = true;	1982 constexpr bool IsRemainder = true;

	1983 Variable *Src0R = legalizeToReg(Src0);

1720 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,	1984 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,

1721 H_urem_i32, IsRemainder);	1985 H_urem_i32, IsRemainder);

1722 return;	1986 return;

1723 }	1987 }

1724 case InstArithmetic::Srem: {	1988 case InstArithmetic::Srem: {

1725 constexpr bool IsRemainder = true;	1989 constexpr bool IsRemainder = true;

	1990 Variable *Src0R = legalizeToReg(Src0);

1726 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,	1991 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,

1727 H_srem_i32, IsRemainder);	1992 H_srem_i32, IsRemainder);

1728 return;	1993 return;

1729 }	1994 }

1730 case InstArithmetic::Frem: {	1995 case InstArithmetic::Frem: {

1731 const SizeT MaxSrcs = 2;	1996 constexpr SizeT MaxSrcs = 2;

	1997 Variable *Src0R = legalizeToReg(Src0);

1732 Type Ty = Dest->getType();	1998 Type Ty = Dest->getType();

1733 InstCall *Call = makeHelperCall(	1999 InstCall *Call = makeHelperCall(

1734 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);	2000 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1735 Call->addArg(Src0R);	2001 Call->addArg(Src0R);

1736 Call->addArg(Src1);	2002 Call->addArg(Src1);

1737 lowerCall(Call);	2003 lowerCall(Call);

1738 return;	2004 return;

1739 }	2005 }

1740 }

1741

1742 // Handle floating point arithmetic separately: they require Src1 to be

1743 // legalized to a register.

1744 switch (Inst->getOp()) {

1745 default:

1746 break;

1747 case InstArithmetic::Fadd: {	2006 case InstArithmetic::Fadd: {

	2007 Variable *Src0R = legalizeToReg(Src0);

1748 Variable *Src1R = legalizeToReg(Src1);	2008 Variable *Src1R = legalizeToReg(Src1);

1749 _vadd(T, Src0R, Src1R);	2009 _vadd(T, Src0R, Src1R);

1750 _mov(Dest, T);	2010 _mov(Dest, T);

1751 return;	2011 return;

1752 }	2012 }

1753 case InstArithmetic::Fsub: {	2013 case InstArithmetic::Fsub: {

	2014 Variable *Src0R = legalizeToReg(Src0);

1754 Variable *Src1R = legalizeToReg(Src1);	2015 Variable *Src1R = legalizeToReg(Src1);

1755 _vsub(T, Src0R, Src1R);	2016 _vsub(T, Src0R, Src1R);

1756 _mov(Dest, T);	2017 _mov(Dest, T);

1757 return;	2018 return;

1758 }	2019 }

1759 case InstArithmetic::Fmul: {	2020 case InstArithmetic::Fmul: {

	2021 Variable *Src0R = legalizeToReg(Src0);

1760 Variable *Src1R = legalizeToReg(Src1);	2022 Variable *Src1R = legalizeToReg(Src1);

1761 _vmul(T, Src0R, Src1R);	2023 _vmul(T, Src0R, Src1R);

1762 _mov(Dest, T);	2024 _mov(Dest, T);

1763 return;	2025 return;

1764 }	2026 }

1765 case InstArithmetic::Fdiv: {	2027 case InstArithmetic::Fdiv: {

	2028 Variable *Src0R = legalizeToReg(Src0);

1766 Variable *Src1R = legalizeToReg(Src1);	2029 Variable *Src1R = legalizeToReg(Src1);

1767 _vdiv(T, Src0R, Src1R);	2030 _vdiv(T, Src0R, Src1R);

1768 _mov(Dest, T);	2031 _mov(Dest, T);

1769 return;	2032 return;

1770 }	2033 }

1771 }	2034 }

1772	2035

1773 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);	2036 // Handle everything else here.

	2037 Int32Operands Srcs(Src0, Src1);

1774 switch (Inst->getOp()) {	2038 switch (Inst->getOp()) {

1775 case InstArithmetic::_num:	2039 case InstArithmetic::_num:

1776 llvm_unreachable("Unknown arithmetic operator");	2040 llvm::report_fatal_error("Unknown arithmetic operator");

1777 return;	2041 return;

1778 case InstArithmetic::Add:	2042 case InstArithmetic::Add:

1779 _add(T, Src0R, Src1RF);	2043 if (Srcs.hasConstOperand()) {

	2044 if (!Srcs.immediateIsFlexEncodable() &&

	2045 Srcs.negatedImmediateIsFlexEncodable()) {

	2046 Variable *Src0R = Srcs.src0R(this);

	2047 Operand *Src1F = Srcs.negatedSrc1F(this);

	2048 if (!Srcs.swappedOperands()) {

	2049 _sub(T, Src0R, Src1F);

	2050 } else {

	2051 _rsb(T, Src0R, Src1F);

	2052 }

	2053 _mov(Dest, T);

	2054 return;

	2055 }

	2056 }

	2057 _add(T, Srcs.src0R(this), Srcs.src1RF(this));

1780 _mov(Dest, T);	2058 _mov(Dest, T);

1781 return;	2059 return;

1782 case InstArithmetic::And:	2060 case InstArithmetic::And:

1783 _and(T, Src0R, Src1RF);	2061 if (Srcs.hasConstOperand()) {

	2062 if (!Srcs.immediateIsFlexEncodable() &&

	2063 Srcs.invertedImmediateIsFlexEncodable()) {

	2064 Variable *Src0R = Srcs.src0R(this);

	2065 Operand *Src1F = Srcs.invertedSrc1F(this);

	2066 _bic(T, Src0R, Src1F);

	2067 _mov(Dest, T);

	2068 return;

	2069 }

	2070 }

	2071 _and(T, Srcs.src0R(this), Srcs.src1RF(this));

1784 _mov(Dest, T);	2072 _mov(Dest, T);

1785 return;	2073 return;

1786 case InstArithmetic::Or:	2074 case InstArithmetic::Or:

1787 _orr(T, Src0R, Src1RF);	2075 _orr(T, Srcs.src0R(this), Srcs.src1RF(this));

1788 _mov(Dest, T);	2076 _mov(Dest, T);

1789 return;	2077 return;

1790 case InstArithmetic::Xor:	2078 case InstArithmetic::Xor:

1791 _eor(T, Src0R, Src1RF);	2079 _eor(T, Srcs.src0R(this), Srcs.src1RF(this));

1792 _mov(Dest, T);	2080 _mov(Dest, T);

1793 return;	2081 return;

1794 case InstArithmetic::Sub:	2082 case InstArithmetic::Sub:

1795 _sub(T, Src0R, Src1RF);	2083 if (Srcs.hasConstOperand()) {

	2084 if (Srcs.immediateIsFlexEncodable()) {

	2085 if (Srcs.swappedOperands()) {

	2086 _rsb(T, Srcs.src0R(this), Srcs.src1RF(this));

	2087 } else {

	2088 _sub(T, Srcs.src0R(this), Srcs.src1RF(this));

	2089 }

	2090 _mov(Dest, T);

	2091 return;

	2092 }

	2093 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {

	2094 _add(T, Srcs.src0R(this), Srcs.negatedSrc1F(this));

	2095 _mov(Dest, T);

	2096 return;

	2097 }

	2098 }

	2099 _sub(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1R(this));

1796 _mov(Dest, T);	2100 _mov(Dest, T);

1797 return;	2101 return;

1798 case InstArithmetic::Mul: {	2102 case InstArithmetic::Mul: {

1799 Variable *Src1R = legalizeToReg(Src1RF);	2103 _mul(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1R(this));

1800 _mul(T, Src0R, Src1R);

1801 _mov(Dest, T);	2104 _mov(Dest, T);

1802 return;	2105 return;

1803 }	2106 }

1804 case InstArithmetic::Shl:	2107 case InstArithmetic::Shl: {

1805 _lsl(T, Src0R, Src1RF);	2108 _lsl(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1RF(this));

1806 _mov(Dest, T);	2109 _mov(Dest, T);

1807 return;	2110 return;

1808 case InstArithmetic::Lshr:	2111 }

	2112 case InstArithmetic::Lshr: {

	2113 Variable *Src0R = Srcs.unswappedSrc0R(this);

1809 if (Dest->getType() != IceType_i32) {	2114 if (Dest->getType() != IceType_i32) {

1810 _uxt(Src0R, Src0R);	2115 _uxt(Src0R, Src0R);

1811 }	2116 }

1812 _lsr(T, Src0R, Src1RF);	2117 _lsr(T, Src0R, Srcs.unswappedSrc1RF(this));

1813 _mov(Dest, T);	2118 _mov(Dest, T);

1814 return;	2119 return;

1815 case InstArithmetic::Ashr:	2120 }

	2121 case InstArithmetic::Ashr: {

	2122 Variable *Src0R = Srcs.unswappedSrc0R(this);

1816 if (Dest->getType() != IceType_i32) {	2123 if (Dest->getType() != IceType_i32) {

1817 _sxt(Src0R, Src0R);	2124 _sxt(Src0R, Src0R);

1818 }	2125 }

1819 _asr(T, Src0R, Src1RF);	2126 _asr(T, Src0R, Srcs.unswappedSrc1RF(this));

1820 _mov(Dest, T);	2127 _mov(Dest, T);

1821 return;	2128 return;

	2129 }

1822 case InstArithmetic::Udiv:	2130 case InstArithmetic::Udiv:

1823 case InstArithmetic::Sdiv:	2131 case InstArithmetic::Sdiv:

1824 case InstArithmetic::Urem:	2132 case InstArithmetic::Urem:

1825 case InstArithmetic::Srem:	2133 case InstArithmetic::Srem:

1826 llvm_unreachable("Integer div/rem should have been handled earlier.");	2134 llvm::report_fatal_error(

	2135 "Integer div/rem should have been handled earlier.");

1827 return;	2136 return;

1828 case InstArithmetic::Fadd:	2137 case InstArithmetic::Fadd:

1829 case InstArithmetic::Fsub:	2138 case InstArithmetic::Fsub:

1830 case InstArithmetic::Fmul:	2139 case InstArithmetic::Fmul:

1831 case InstArithmetic::Fdiv:	2140 case InstArithmetic::Fdiv:

1832 case InstArithmetic::Frem:	2141 case InstArithmetic::Frem:

1833 llvm_unreachable("Floating point arith should have been handled earlier.");	2142 llvm::report_fatal_error(

	2143 "Floating point arith should have been handled earlier.");

1834 return;	2144 return;

1835 }	2145 }

1836 }	2146 }

1837	2147

1838 void TargetARM32::lowerAssign(const InstAssign *Inst) {	2148 void TargetARM32::lowerAssign(const InstAssign *Inst) {

1839 Variable *Dest = Inst->getDest();	2149 Variable *Dest = Inst->getDest();

1840 Operand *Src0 = Inst->getSrc(0);	2150 Operand *Src0 = Inst->getSrc(0);

1841 assert(Dest->getType() == Src0->getType());	2151 assert(Dest->getType() == Src0->getType());

1842 if (Dest->getType() == IceType_i64) {	2152 if (Dest->getType() == IceType_i64) {

1843 Src0 = legalizeUndef(Src0);	2153 Src0 = legalizeUndef(Src0);

	2154

	2155 Variable *T_Lo = makeReg(IceType_i32);

	2156 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);	2157 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);

1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1848 Variable *T_Lo = makeReg(IceType_i32);

1849 Variable *T_Hi = makeReg(IceType_i32);

1850

1851 _mov(T_Lo, Src0Lo);	2158 _mov(T_Lo, Src0Lo);

1852 _mov(DestLo, T_Lo);	2159 _mov(DestLo, T_Lo);

	2160

	2161 Variable *T_Hi = makeReg(IceType_i32);

	2162 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	2163 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

1853 _mov(T_Hi, Src0Hi);	2164 _mov(T_Hi, Src0Hi);

1854 _mov(DestHi, T_Hi);	2165 _mov(DestHi, T_Hi);

	2166

	2167 return;

	2168 }

	2169

	2170 Operand *NewSrc;

	2171 if (Dest->hasReg()) {

	2172 // If Dest already has a physical register, then legalize the Src operand

	2173 // into a Variable with the same register assignment. This especially

	2174 // helps allow the use of Flex operands.

	2175 NewSrc = legalize(Src0, Legal_Reg \| Legal_Flex, Dest->getRegNum());

1855 } else {	2176 } else {

1856 Operand *NewSrc;	2177 // Dest could be a stack operand. Since we could potentially need to do a

1857 if (Dest->hasReg()) {	2178 // Store (and store can only have Register operands), legalize this to a

1858 // If Dest already has a physical register, then legalize the Src operand	2179 // register.

1859 // into a Variable with the same register assignment. This especially	2180 NewSrc = legalize(Src0, Legal_Reg);

1860 // helps allow the use of Flex operands.

1861 NewSrc = legalize(Src0, Legal_Reg \| Legal_Flex, Dest->getRegNum());

1862 } else {

1863 // Dest could be a stack operand. Since we could potentially need to do a

1864 // Store (and store can only have Register operands), legalize this to a

1865 // register.

1866 NewSrc = legalize(Src0, Legal_Reg);

1867 }

1868 if (isVectorType(Dest->getType())) {

1869 Variable *SrcR = legalizeToReg(NewSrc);

1870 _mov(Dest, SrcR);

1871 } else if (isFloatingType(Dest->getType())) {

1872 Variable *SrcR = legalizeToReg(NewSrc);

1873 _mov(Dest, SrcR);

1874 } else {

1875 _mov(Dest, NewSrc);

1876 }

1877 }	2181 }

	2182

	2183 if (isVectorType(Dest->getType()) \|\| isScalarFloatingType(Dest->getType())) {

	2184 NewSrc = legalize(NewSrc, Legal_Reg \| Legal_Mem);

	2185 }

	2186 _mov(Dest, NewSrc);

1878 }	2187 }

1879	2188

1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(	2189 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(

1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,	2190 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,

1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {	2191 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {

1883 InstARM32Label *NewShortCircuitLabel = nullptr;	2192 InstARM32Label *NewShortCircuitLabel = nullptr;

1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);	2193 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);

1885	2194

1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean);	2195 const Inst *Producer = BoolComputations.getProducerOf(Boolean);

1887	2196

(...skipping 685 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2573 struct {	2882 struct {

2574 CondARM32::Cond CC0;	2883 CondARM32::Cond CC0;

2575 CondARM32::Cond CC1;	2884 CondARM32::Cond CC1;

2576 } TableFcmp[] = {	2885 } TableFcmp[] = {

2577 #define X(val, CC0, CC1) \	2886 #define X(val, CC0, CC1) \

2578 { CondARM32::CC0, CondARM32::CC1 } \	2887 { CondARM32::CC0, CondARM32::CC1 } \

2579 ,	2888 ,

2580 FCMPARM32_TABLE	2889 FCMPARM32_TABLE

2581 #undef X	2890 #undef X

2582 };	2891 };

	2892

	2893 bool isFloatingPointZero(Operand *Src) {

	2894 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {

	2895 return Utils::isPositiveZero(F32->getValue());

	2896 }

	2897

	2898 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {

	2899 return Utils::isPositiveZero(F64->getValue());

	2900 }

	2901

	2902 return false;

	2903 }

2583 } // end of anonymous namespace	2904 } // end of anonymous namespace

2584	2905

2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {	2906 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {

2586 InstFcmp::FCond Condition = Instr->getCondition();	2907 InstFcmp::FCond Condition = Instr->getCondition();

2587 switch (Condition) {	2908 switch (Condition) {

2588 case InstFcmp::False:	2909 case InstFcmp::False:

2589 return CondWhenTrue(CondARM32::kNone);	2910 return CondWhenTrue(CondARM32::kNone);

2590 case InstFcmp::True:	2911 case InstFcmp::True:

2591 return CondWhenTrue(CondARM32::AL);	2912 return CondWhenTrue(CondARM32::AL);

2592 break;	2913 break;

2593 default: {	2914 default: {

2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0));	2915 Variable *Src0R = legalizeToReg(Instr->getSrc(0));

2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1));	2916 Operand *Src1 = Instr->getSrc(1);

2596 _vcmp(Src0R, Src1R);	2917 if (isFloatingPointZero(Src1)) {

	2918 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));

	2919 } else {

	2920 _vcmp(Src0R, legalizeToReg(Src1));

	2921 }

2597 _vmrs();	2922 _vmrs();

2598 assert(Condition < llvm::array_lengthof(TableFcmp));	2923 assert(Condition < llvm::array_lengthof(TableFcmp));

2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);	2924 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);

2600 }	2925 }

2601 }	2926 }

2602 }	2927 }

2603	2928

2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {	2929 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {

2605 Variable *Dest = Instr->getDest();	2930 Variable *Dest = Instr->getDest();

2606 if (isVectorType(Dest->getType())) {	2931 if (isVectorType(Dest->getType())) {

(...skipping 28 matching lines...) Expand all Loading...
2635 _mov(T, _1, Cond.WhenTrue0);	2960 _mov(T, _1, Cond.WhenTrue0);

2636 }	2961 }

2637	2962

2638 if (Cond.WhenTrue1 != CondARM32::kNone) {	2963 if (Cond.WhenTrue1 != CondARM32::kNone) {

2639 _mov_redefined(T, _1, Cond.WhenTrue1);	2964 _mov_redefined(T, _1, Cond.WhenTrue1);

2640 }	2965 }

2641	2966

2642 _mov(Dest, T);	2967 _mov(Dest, T);

2643 }	2968 }

2644	2969

2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {	2970 TargetARM32::CondWhenTrue

2646 assert(Inst->getSrc(0)->getType() != IceType_i1);	2971 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

2647 assert(Inst->getSrc(1)->getType() != IceType_i1);	2972 Operand *Src1) {

	2973 size_t Index = static_cast<size_t>(Condition);

	2974 assert(Index < llvm::array_lengthof(TableIcmp64));

2648	2975

2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	2976 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));

2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	2977 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));

	2978 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());

	2979 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());

	2980

	2981 if (SrcsLo.hasConstOperand()) {

	2982 const uint32_t ValueLo = SrcsLo.getConstantValue();

	2983 const uint32_t ValueHi = SrcsHi.getConstantValue();

	2984 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) \| ValueLo;

	2985 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) &&

	2986 Value == 0) {

	2987 Variable *T = makeReg(IceType_i32);

	2988 _orrs(T, SrcsLo.src0R(this), SrcsHi.src0R(this));

	2989 Context.insert(InstFakeUse::create(Func, T));

	2990 return CondWhenTrue(TableIcmp64[Index].C1);

	2991 }

	2992

	2993 Variable *Src0RLo = SrcsLo.src0R(this);

	2994 Variable *Src0RHi = SrcsHi.src0R(this);

	2995 Operand *Src1RFLo = SrcsLo.src1RF(this);

	2996 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);

	2997

	2998 const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands();

	2999

	3000 if (UseRsb) {

	3001 if (TableIcmp64[Index].IsSigned) {

	3002 Variable *T = makeReg(IceType_i32);

	3003 _rsbs(T, Src0RLo, Src1RFLo);

	3004 Context.insert(InstFakeUse::create(Func, T));

	3005

	3006 T = makeReg(IceType_i32);

	3007 _rscs(T, Src0RHi, Src1RFHi);

	3008 // We need to add a FakeUse here because liveness gets mad at us (Def

	3009 // without Use.) Note that flag-setting instructions are considered to

	3010 // have side effects and, therefore, are not DCE'ed.

	3011 Context.insert(InstFakeUse::create(Func, T));

	3012 } else {

	3013 Variable *T = makeReg(IceType_i32);

	3014 _rsbs(T, Src0RHi, Src1RFHi);

	3015 Context.insert(InstFakeUse::create(Func, T));

	3016

	3017 T = makeReg(IceType_i32);

	3018 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);

	3019 Context.insert(InstFakeUse::create(Func, T));

	3020 }

	3021 } else {

	3022 if (TableIcmp64[Index].IsSigned) {

	3023 _cmp(Src0RLo, Src1RFLo);

	3024 Variable *T = makeReg(IceType_i32);

	3025 _sbcs(T, Src0RHi, Src1RFHi);

	3026 Context.insert(InstFakeUse::create(Func, T));

	3027 } else {

	3028 _cmp(Src0RHi, Src1RFHi);

	3029 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

	3030 }

	3031 }

	3032

	3033 return CondWhenTrue(TableIcmp64[Index].C1);

	3034 }

	3035

	3036 Variable Src0RLo, Src0RHi;

	3037 Operand Src1RFLo, Src1RFHi;

	3038 if (TableIcmp64[Index].Swapped) {

	3039 Src0RLo = legalizeToReg(loOperand(Src1));

	3040 Src0RHi = legalizeToReg(hiOperand(Src1));

	3041 Src1RFLo = legalizeToReg(loOperand(Src0));

	3042 Src1RFHi = legalizeToReg(hiOperand(Src0));

	3043 } else {

	3044 Src0RLo = legalizeToReg(loOperand(Src0));

	3045 Src0RHi = legalizeToReg(hiOperand(Src0));

	3046 Src1RFLo = legalizeToReg(loOperand(Src1));

	3047 Src1RFHi = legalizeToReg(hiOperand(Src1));

	3048 }

2651	3049

2652 // a=icmp cond, b, c ==>	3050 // a=icmp cond, b, c ==>

2653 // GCC does:	3051 // GCC does:

2654 // cmp b.hi, c.hi or cmp b.lo, c.lo	3052 // cmp b.hi, c.hi or cmp b.lo, c.lo

2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi	3053 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi

2656 // mov.<C1> t, #1 mov.<C1> t, #1	3054 // mov.<C1> t, #1 mov.<C1> t, #1

2657 // mov.<C2> t, #0 mov.<C2> t, #0	3055 // mov.<C2> t, #0 mov.<C2> t, #0

2658 // mov a, t mov a, t	3056 // mov a, t mov a, t

2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"	3057 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"

2660 // is used for signed compares. In some cases, b and c need to be swapped as	3058 // is used for signed compares. In some cases, b and c need to be swapped as

(...skipping 10 matching lines...) Expand all Loading...
2671 // that's nice in that it's just as short but has fewer dependencies for	3069 // that's nice in that it's just as short but has fewer dependencies for

2672 // better ILP at the cost of more registers.	3070 // better ILP at the cost of more registers.

2673 //	3071 //

2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two	3072 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two

2675 // unconditional mov #0, two cmps, two conditional mov #1, and one	3073 // unconditional mov #0, two cmps, two conditional mov #1, and one

2676 // conditional reg mov. That has few dependencies for good ILP, but is a	3074 // conditional reg mov. That has few dependencies for good ILP, but is a

2677 // longer sequence.	3075 // longer sequence.

2678 //	3076 //

2679 // So, we are going with the GCC version since it's usually better (except	3077 // So, we are going with the GCC version since it's usually better (except

2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later.	3078 // perhaps for eq/ne). We could revisit special-casing eq/ne later.

	3079 if (TableIcmp64[Index].IsSigned) {

	3080 Variable *ScratchReg = makeReg(IceType_i32);

	3081 _cmp(Src0RLo, Src1RFLo);

	3082 _sbcs(ScratchReg, Src0RHi, Src1RFHi);

	3083 // ScratchReg isn't going to be used, but we need the side-effect of

	3084 // setting flags from this operation.

	3085 Context.insert(InstFakeUse::create(Func, ScratchReg));

	3086 } else {

	3087 _cmp(Src0RHi, Src1RFHi);

	3088 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

	3089 }

	3090 return CondWhenTrue(TableIcmp64[Index].C1);

	3091 }

2681	3092

2682 if (Src0->getType() == IceType_i64) {	3093 TargetARM32::CondWhenTrue

2683 InstIcmp::ICond Conditon = Inst->getCondition();	3094 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

2684 size_t Index = static_cast<size_t>(Conditon);	3095 Operand *Src1) {

2685 assert(Index < llvm::array_lengthof(TableIcmp64));	3096 Int32Operands Srcs(Src0, Src1);

2686 Variable Src0Lo, Src0Hi;	3097 if (!Srcs.hasConstOperand()) {

2687 Operand Src1LoRF, Src1HiRF;	3098

2688 if (TableIcmp64[Index].Swapped) {	3099 Variable *Src0R = Srcs.src0R(this);

2689 Src0Lo = legalizeToReg(loOperand(Src1));	3100 Operand *Src1RF = Srcs.src1RF(this);

2690 Src0Hi = legalizeToReg(hiOperand(Src1));	3101 _cmp(Src0R, Src1RF);

2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);	3102 return CondWhenTrue(getIcmp32Mapping(Condition));

2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

2693 } else {

2694 Src0Lo = legalizeToReg(loOperand(Src0));

2695 Src0Hi = legalizeToReg(hiOperand(Src0));

2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

2698 }

2699 if (TableIcmp64[Index].IsSigned) {

2700 Variable *ScratchReg = makeReg(IceType_i32);

2701 _cmp(Src0Lo, Src1LoRF);

2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF);

2703 // ScratchReg isn't going to be used, but we need the side-effect of

2704 // setting flags from this operation.

2705 Context.insert(InstFakeUse::create(Func, ScratchReg));

2706 } else {

2707 _cmp(Src0Hi, Src1HiRF);

2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);

2709 }

2710 return CondWhenTrue(TableIcmp64[Index].C1);

2711 }	3103 }

2712	3104

	3105 Variable *Src0R = Srcs.src0R(this);

	3106 const int32_t Value = Srcs.getConstantValue();

	3107 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) && Value == 0) {

	3108 _tst(Src0R, Src0R);

	3109 return CondWhenTrue(getIcmp32Mapping(Condition));

	3110 }

	3111

	3112 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&

	3113 Srcs.negatedImmediateIsFlexEncodable()) {

	3114 Operand *Src1F = Srcs.negatedSrc1F(this);

	3115 _cmn(Src0R, Src1F);

	3116 return CondWhenTrue(getIcmp32Mapping(Condition));

	3117 }

	3118

	3119 Operand *Src1RF = Srcs.src1RF(this);

	3120 if (!Srcs.swappedOperands()) {

	3121 _cmp(Src0R, Src1RF);

	3122 } else {

	3123 Variable *T = makeReg(IceType_i32);

	3124 _rsbs(T, Src0R, Src1RF);

	3125 Context.insert(InstFakeUse::create(Func, T));

	3126 }

	3127 return CondWhenTrue(getIcmp32Mapping(Condition));

	3128 }

	3129

	3130 TargetARM32::CondWhenTrue

	3131 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

	3132 Operand *Src1) {

	3133 Int32Operands Srcs(Src0, Src1);

	3134 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());

	3135 assert(ShAmt >= 0);

	3136

	3137 if (!Srcs.hasConstOperand()) {

	3138 Variable *Src0R = makeReg(IceType_i32);

	3139 Operand *ShAmtF =

	3140 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg \| Legal_Flex);

	3141 _lsl(Src0R, legalizeToReg(Src0), ShAmtF);

	3142

	3143 Variable *Src1R = legalizeToReg(Src1);

	3144 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(

	3145 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);

	3146 _cmp(Src0R, Src1F);

	3147 return CondWhenTrue(getIcmp32Mapping(Condition));

	3148 }

	3149

	3150 const int32_t Value = Srcs.getConstantValue();

	3151 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) && Value == 0) {

	3152 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);

	3153 Variable *T = makeReg(IceType_i32);

	3154 _lsls(T, Srcs.src0R(this), ShAmtOp);

	3155 Context.insert(InstFakeUse::create(Func, T));

	3156 return CondWhenTrue(getIcmp32Mapping(Condition));

	3157 }

	3158

	3159 Variable *ConstR = makeReg(IceType_i32);

	3160 _mov(ConstR,

	3161 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg \| Legal_Flex));

	3162 Operand *NonConstF = OperandARM32FlexReg::create(

	3163 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,

	3164 Ctx->getConstantInt32(ShAmt));

	3165

	3166 if (Srcs.swappedOperands()) {

	3167 _cmp(ConstR, NonConstF);

	3168 } else {

	3169 Variable *T = makeReg(IceType_i32);

	3170 _rsbs(T, ConstR, NonConstF);

	3171 Context.insert(InstFakeUse::create(Func, T));

	3172 }

	3173 return CondWhenTrue(getIcmp32Mapping(Condition));

	3174 }

	3175

	3176 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

	3177 assert(Inst->getSrc(0)->getType() != IceType_i1);

	3178 assert(Inst->getSrc(1)->getType() != IceType_i1);

	3179

	3180 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

	3181 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

	3182

	3183 const InstIcmp::ICond Condition = Inst->getCondition();

2713 // a=icmp cond b, c ==>	3184 // a=icmp cond b, c ==>

2714 // GCC does:	3185 // GCC does:

2715 // <u/s>xtb tb, b	3186 // <u/s>xtb tb, b

2716 // <u/s>xtb tc, c	3187 // <u/s>xtb tc, c

2717 // cmp tb, tc	3188 // cmp tb, tc

2718 // mov.C1 t, #0	3189 // mov.C1 t, #0

2719 // mov.C2 t, #1	3190 // mov.C2 t, #1

2720 // mov a, t	3191 // mov a, t

2721 // where the unsigned/sign extension is not needed for 32-bit. They also have	3192 // where the unsigned/sign extension is not needed for 32-bit. They also have

2722 // special cases for EQ and NE. E.g., for NE:	3193 // special cases for EQ and NE. E.g., for NE:

2723 // <extend to tb, tc>	3194 // <extend to tb, tc>

2724 // subs t, tb, tc	3195 // subs t, tb, tc

2725 // movne t, #1	3196 // movne t, #1

2726 // mov a, t	3197 // mov a, t

2727 //	3198 //

2728 // LLVM does:	3199 // LLVM does:

2729 // lsl tb, b, #<N>	3200 // lsl tb, b, #<N>

2730 // mov t, #0	3201 // mov t, #0

2731 // cmp tb, c, lsl #<N>	3202 // cmp tb, c, lsl #<N>

2732 // mov.<C> t, #1	3203 // mov.<C> t, #1

2733 // mov a, t	3204 // mov a, t

2734 //	3205 //

2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on	3206 // the left shift is by 0, 16, or 24, which allows the comparison to focus on

2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For	3207 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For

2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb	3208 // the unsigned case, for some reason it does similar to GCC and does a uxtb

2738 // first. It's not clear to me why that special-casing is needed.	3209 // first. It's not clear to me why that special-casing is needed.

2739 //	3210 //

2740 // We'll go with the LLVM way for now, since it's shorter and has just as few	3211 // We'll go with the LLVM way for now, since it's shorter and has just as few

2741 // dependencies.	3212 // dependencies.

2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());	3213 switch (Src0->getType()) {

2743 assert(ShiftAmt >= 0);	3214 default:

2744 Constant *ShiftConst = nullptr;	3215 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");

2745 Variable *Src0R = nullptr;	3216 case IceType_i8:

2746 if (ShiftAmt) {	3217 case IceType_i16:

2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt);	3218 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);

2748 Src0R = makeReg(IceType_i32);	3219 case IceType_i32:

2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);	3220 return lowerInt32IcmpCond(Condition, Src0, Src1);

2750 } else {	3221 case IceType_i64:

2751 Src0R = legalizeToReg(Src0);	3222 return lowerInt64IcmpCond(Condition, Src0, Src1);

2752 }	3223 }

2753 if (ShiftAmt) {

2754 Variable *Src1R = legalizeToReg(Src1);

2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(

2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);

2757 _cmp(Src0R, Src1RShifted);

2758 } else {

2759 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);

2760 _cmp(Src0R, Src1RF);

2761 }

2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));

2763 }	3224 }

2764	3225

2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {	3226 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

2766 Variable *Dest = Inst->getDest();	3227 Variable *Dest = Inst->getDest();

2767	3228

2768 if (isVectorType(Dest->getType())) {	3229 if (isVectorType(Dest->getType())) {

2769 Variable *T = makeReg(Dest->getType());	3230 Variable *T = makeReg(Dest->getType());

2770 Context.insert(InstFakeDef::create(Func, T));	3231 Context.insert(InstFakeDef::create(Func, T));

2771 _mov(Dest, T);	3232 _mov(Dest, T);

2772 UnimplementedError(Func->getContext()->getFlags());	3233 UnimplementedError(Func->getContext()->getFlags());

(...skipping 1474 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4247 }	4708 }

4248 return Reg;	4709 return Reg;

4249 }	4710 }

4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {	4711 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {

4251 Variable *Reg = makeReg(Ty, RegNum);	4712 Variable *Reg = makeReg(Ty, RegNum);

4252 _movw(Reg, C);	4713 _movw(Reg, C);

4253 _movt(Reg, C);	4714 _movt(Reg, C);

4254 return Reg;	4715 return Reg;

4255 } else {	4716 } else {

4256 assert(isScalarFloatingType(Ty));	4717 assert(isScalarFloatingType(Ty));

	4718 uint32_t ModifiedImm;

	4719 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {

	4720 Variable *T = makeReg(Ty, RegNum);

	4721 _mov(T,

	4722 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));

	4723 return T;

	4724 }

	4725

	4726 if (Ty == IceType_f64 && isFloatingPointZero(From)) {

	4727 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32

	4728 // because ARM does not have a veor instruction with S registers.

	4729 Variable *T = makeReg(IceType_f64, RegNum);

	4730 Context.insert(InstFakeDef::create(Func, T));

	4731 _veor(T, T, T);

	4732 return T;

	4733 }

	4734

4257 // Load floats/doubles from literal pool.	4735 // Load floats/doubles from literal pool.

4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an

4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified

4260 // immediate constants". Or, for 32-bit floating point numbers, just

4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG

4262 // instead of using a movw/movt pair to get the const-pool address then

4263 // loading to SREG.

4264 std::string Buffer;	4736 std::string Buffer;

4265 llvm::raw_string_ostream StrBuf(Buffer);	4737 llvm::raw_string_ostream StrBuf(Buffer);

4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);	4738 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);

4267 llvm::cast<Constant>(From)->setShouldBePooled(true);	4739 llvm::cast<Constant>(From)->setShouldBePooled(true);

4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);	4740 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);

4269 Variable *BaseReg = makeReg(getPointerType());	4741 Variable *BaseReg = makeReg(getPointerType());

4270 _movw(BaseReg, Offset);	4742 _movw(BaseReg, Offset);

4271 _movt(BaseReg, Offset);	4743 _movt(BaseReg, Offset);

4272 From = formMemoryOperand(BaseReg, Ty);	4744 From = formMemoryOperand(BaseReg, Ty);

4273 return copyToReg(From, RegNum);	4745 return copyToReg(From, RegNum);

(...skipping 625 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	5371 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

4900 // However, for compatibility with current NaCl LLVM, don't claim that.	5372 // However, for compatibility with current NaCl LLVM, don't claim that.

4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	5373 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

4902 }	5374 }

4903	5375

4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];	5376 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];

4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];	5377 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

4906 llvm::SmallBitVector TargetARM32::ScratchRegs;	5378 llvm::SmallBitVector TargetARM32::ScratchRegs;

4907	5379

4908 } // end of namespace Ice	5380 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX86BaseImpl.h » ('j') | src/IceUtils.h » ('J')