OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1290 } | 1290 } |
1291 _mov(Dest, SP); | 1291 _mov(Dest, SP); |
1292 } | 1292 } |
1293 | 1293 |
1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { | 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) | 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) |
1296 return; | 1296 return; |
1297 Variable *SrcLoReg = legalizeToReg(SrcLo); | 1297 Variable *SrcLoReg = legalizeToReg(SrcLo); |
1298 switch (Ty) { | 1298 switch (Ty) { |
1299 default: | 1299 default: |
1300 llvm_unreachable("Unexpected type"); | 1300 llvm::report_fatal_error("Unexpected type"); |
1301 case IceType_i8: { | 1301 case IceType_i8: |
1302 Operand *Mask = | |
1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); | |
1304 _tst(SrcLoReg, Mask); | |
1305 break; | |
1306 } | |
1307 case IceType_i16: { | 1302 case IceType_i16: { |
1308 Operand *Mask = | 1303 Operand *ShAmtF = |
1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); | 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)), |
1310 _tst(SrcLoReg, Mask); | 1305 Legal_Reg | Legal_Flex); |
1311 break; | 1306 Variable *T = makeReg(IceType_i32); |
1312 } | 1307 _lsls(T, SrcLoReg, ShAmtF); |
| 1308 Context.insert(InstFakeUse::create(Func, T)); |
| 1309 } break; |
1313 case IceType_i32: { | 1310 case IceType_i32: { |
1314 _tst(SrcLoReg, SrcLoReg); | 1311 _tst(SrcLoReg, SrcLoReg); |
1315 break; | 1312 break; |
1316 } | 1313 } |
1317 case IceType_i64: { | 1314 case IceType_i64: { |
1318 Variable *ScratchReg = makeReg(IceType_i32); | 1315 Variable *T = makeReg(IceType_i32); |
1319 _orrs(ScratchReg, SrcLoReg, SrcHi); | 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); |
1320 // ScratchReg isn't going to be used, but we need the side-effect of | 1317 // T isn't going to be used, but we need the side-effect of setting flags |
1321 // setting flags from this operation. | 1318 // from this operation. |
1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 1319 Context.insert(InstFakeUse::create(Func, T)); |
1323 } | 1320 } |
1324 } | 1321 } |
1325 InstARM32Label *Label = InstARM32Label::create(Func, this); | 1322 InstARM32Label *Label = InstARM32Label::create(Func, this); |
1326 _br(Label, CondARM32::NE); | 1323 _br(Label, CondARM32::NE); |
1327 _trap(); | 1324 _trap(); |
1328 Context.insert(Label); | 1325 Context.insert(Label); |
1329 } | 1326 } |
1330 | 1327 |
1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
1332 Operand *Src1, ExtInstr ExtFunc, | 1329 Operand *Src1, ExtInstr ExtFunc, |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1397 _orr(T, Src0, Src1RF); | 1394 _orr(T, Src0, Src1RF); |
1398 break; | 1395 break; |
1399 case InstArithmetic::Xor: | 1396 case InstArithmetic::Xor: |
1400 _eor(T, Src0, Src1RF); | 1397 _eor(T, Src0, Src1RF); |
1401 break; | 1398 break; |
1402 } | 1399 } |
1403 _mov(Dest, T); | 1400 _mov(Dest, T); |
1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; | 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; |
1405 } | 1402 } |
1406 | 1403 |
| 1404 namespace { |
| 1405 // NumericOperands is used during arithmetic/icmp lowering for constant folding. |
| 1406 // It holds the two sources operands, and maintains some state as to whether one |
| 1407 // of them is a constant. If one of the operands is a constant, then it will be |
| 1408 // be stored as the operation's second source, with a bit indicating whether the |
| 1409 // operands were swapped. |
| 1410 // |
| 1411 // The class is split into a base class with operand type-independent methods, |
| 1412 // and a derived, templated class, for each type of operand we want to fold |
| 1413 // constants for: |
| 1414 // |
| 1415 // NumericOperandsBase --> NumericOperands<ConstantFloat> |
| 1416 // --> NumericOperands<ConstantDouble> |
| 1417 // --> NumericOperands<ConstantInt32> |
| 1418 // |
| 1419 // NumericOperands<ConstantInt32> also exposes helper methods for emitting |
| 1420 // inverted/negated immediates. |
| 1421 class NumericOperandsBase { |
| 1422 NumericOperandsBase() = delete; |
| 1423 NumericOperandsBase(const NumericOperandsBase &) = delete; |
| 1424 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete; |
| 1425 |
| 1426 public: |
| 1427 NumericOperandsBase(Operand *S0, Operand *S1) |
| 1428 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)), |
| 1429 Swapped(Src0 == S1 && S0 != S1) { |
| 1430 assert(Src0 != nullptr); |
| 1431 assert(Src1 != nullptr); |
| 1432 assert(Src0 != Src1 || S0 == S1); |
| 1433 } |
| 1434 |
| 1435 bool hasConstOperand() const { |
| 1436 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1); |
| 1437 } |
| 1438 |
| 1439 bool swappedOperands() const { return Swapped; } |
| 1440 |
| 1441 Variable *src0R(TargetARM32 *Target) const { |
| 1442 return legalizeToReg(Target, Src0); |
| 1443 } |
| 1444 |
| 1445 Variable *unswappedSrc0R(TargetARM32 *Target) const { |
| 1446 return legalizeToReg(Target, Swapped ? Src1 : Src0); |
| 1447 } |
| 1448 |
| 1449 Operand *src1RF(TargetARM32 *Target) const { |
| 1450 return legalizeToRegOrFlex(Target, Src1); |
| 1451 } |
| 1452 |
| 1453 Variable *unswappedSrc1R(TargetARM32 *Target) const { |
| 1454 return legalizeToReg(Target, Swapped ? Src0 : Src1); |
| 1455 } |
| 1456 |
| 1457 Operand *unswappedSrc1RF(TargetARM32 *Target) const { |
| 1458 return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1); |
| 1459 } |
| 1460 |
| 1461 protected: |
| 1462 Operand *const Src0; |
| 1463 Operand *const Src1; |
| 1464 const bool Swapped; |
| 1465 |
| 1466 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) { |
| 1467 return Target->legalizeToReg(Src); |
| 1468 } |
| 1469 |
| 1470 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) { |
| 1471 return Target->legalize(Src, |
| 1472 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex); |
| 1473 } |
| 1474 |
| 1475 private: |
| 1476 static Operand *NonConstOperand(Operand *S0, Operand *S1) { |
| 1477 if (!llvm::isa<Constant>(S0)) |
| 1478 return S0; |
| 1479 if (!llvm::isa<Constant>(S1)) |
| 1480 return S1; |
| 1481 if (llvm::isa<ConstantRelocatable>(S1) && |
| 1482 !llvm::isa<ConstantRelocatable>(S0)) |
| 1483 return S1; |
| 1484 return S0; |
| 1485 } |
| 1486 |
| 1487 static Operand *ConstOperand(Operand *S0, Operand *S1) { |
| 1488 if (!llvm::isa<Constant>(S0)) |
| 1489 return S1; |
| 1490 if (!llvm::isa<Constant>(S1)) |
| 1491 return S0; |
| 1492 if (llvm::isa<ConstantRelocatable>(S1) && |
| 1493 !llvm::isa<ConstantRelocatable>(S0)) |
| 1494 return S0; |
| 1495 return S1; |
| 1496 } |
| 1497 }; |
| 1498 |
| 1499 template <typename C> class NumericOperands : public NumericOperandsBase { |
| 1500 NumericOperands() = delete; |
| 1501 NumericOperands(const NumericOperands &) = delete; |
| 1502 NumericOperands &operator=(const NumericOperands &) = delete; |
| 1503 |
| 1504 public: |
| 1505 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) { |
| 1506 assert(!hasConstOperand() || llvm::isa<C>(this->Src1)); |
| 1507 } |
| 1508 |
| 1509 typename C::PrimType getConstantValue() const { |
| 1510 return llvm::cast<C>(Src1)->getValue(); |
| 1511 } |
| 1512 }; |
| 1513 |
| 1514 using FloatOperands = NumericOperands<ConstantFloat>; |
| 1515 using DoubleOperands = NumericOperands<ConstantDouble>; |
| 1516 |
| 1517 class Int32Operands : public NumericOperands<ConstantInteger32> { |
| 1518 Int32Operands() = delete; |
| 1519 Int32Operands(const Int32Operands &) = delete; |
| 1520 Int32Operands &operator=(const Int32Operands &) = delete; |
| 1521 |
| 1522 public: |
| 1523 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {} |
| 1524 |
| 1525 bool immediateIsFlexEncodable() const { |
| 1526 uint32_t Rotate, Imm8; |
| 1527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8); |
| 1528 } |
| 1529 |
| 1530 bool negatedImmediateIsFlexEncodable() const { |
| 1531 uint32_t Rotate, Imm8; |
| 1532 return OperandARM32FlexImm::canHoldImm( |
| 1533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8); |
| 1534 } |
| 1535 |
| 1536 Operand *negatedSrc1F(TargetARM32 *Target) const { |
| 1537 return legalizeToRegOrFlex(Target, |
| 1538 Target->getCtx()->getConstantInt32( |
| 1539 -static_cast<int32_t>(getConstantValue()))); |
| 1540 } |
| 1541 |
| 1542 bool invertedImmediateIsFlexEncodable() const { |
| 1543 uint32_t Rotate, Imm8; |
| 1544 return OperandARM32FlexImm::canHoldImm( |
| 1545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8); |
| 1546 } |
| 1547 |
| 1548 Operand *invertedSrc1F(TargetARM32 *Target) const { |
| 1549 return legalizeToRegOrFlex(Target, |
| 1550 Target->getCtx()->getConstantInt32( |
| 1551 ~static_cast<uint32_t>(getConstantValue()))); |
| 1552 } |
| 1553 }; |
| 1554 } // end of anonymous namespace |
| 1555 |
| 1556 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, |
| 1557 Variable *Dest, Operand *Src0, |
| 1558 Operand *Src1) { |
| 1559 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); |
| 1560 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); |
| 1561 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); |
| 1562 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); |
| 1563 |
| 1564 // These helper-call-involved instructions are lowered in this separate |
| 1565 // switch. This is because we would otherwise assume that we need to |
| 1566 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
| 1567 // helper calls, and such unused/redundant instructions will fail liveness |
| 1568 // analysis under -Om1 setting. |
| 1569 switch (Op) { |
| 1570 default: |
| 1571 break; |
| 1572 case InstArithmetic::Udiv: |
| 1573 case InstArithmetic::Sdiv: |
| 1574 case InstArithmetic::Urem: |
| 1575 case InstArithmetic::Srem: { |
| 1576 // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
| 1577 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
| 1578 // register, which will hide a constant source operand. Instead, check |
| 1579 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
| 1580 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { |
| 1581 if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) { |
| 1582 _trap(); |
| 1583 return; |
| 1584 } |
| 1585 } else { |
| 1586 Operand *Src1Lo = SrcsLo.unswappedSrc1R(this); |
| 1587 Operand *Src1Hi = SrcsHi.unswappedSrc1R(this); |
| 1588 div0Check(IceType_i64, Src1Lo, Src1Hi); |
| 1589 } |
| 1590 // Technically, ARM has its own aeabi routines, but we can use the |
| 1591 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
| 1592 // the more standard __moddi3 for rem. |
| 1593 const char *HelperName = ""; |
| 1594 switch (Op) { |
| 1595 default: |
| 1596 llvm::report_fatal_error("Should have only matched div ops."); |
| 1597 break; |
| 1598 case InstArithmetic::Udiv: |
| 1599 HelperName = H_udiv_i64; |
| 1600 break; |
| 1601 case InstArithmetic::Sdiv: |
| 1602 HelperName = H_sdiv_i64; |
| 1603 break; |
| 1604 case InstArithmetic::Urem: |
| 1605 HelperName = H_urem_i64; |
| 1606 break; |
| 1607 case InstArithmetic::Srem: |
| 1608 HelperName = H_srem_i64; |
| 1609 break; |
| 1610 } |
| 1611 constexpr SizeT MaxSrcs = 2; |
| 1612 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
| 1613 Call->addArg(Src0); |
| 1614 Call->addArg(Src1); |
| 1615 lowerCall(Call); |
| 1616 return; |
| 1617 } |
| 1618 } |
| 1619 |
| 1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1622 Variable *T_Lo = makeReg(DestLo->getType()); |
| 1623 Variable *T_Hi = makeReg(DestHi->getType()); |
| 1624 |
| 1625 switch (Op) { |
| 1626 case InstArithmetic::_num: |
| 1627 llvm::report_fatal_error("Unknown arithmetic operator"); |
| 1628 return; |
| 1629 case InstArithmetic::Add: { |
| 1630 Variable *Src0LoR = SrcsLo.src0R(this); |
| 1631 Operand *Src1LoRF = SrcsLo.src1RF(this); |
| 1632 Variable *Src0HiR = SrcsHi.src0R(this); |
| 1633 Operand *Src1HiRF = SrcsHi.src1RF(this); |
| 1634 _adds(T_Lo, Src0LoR, Src1LoRF); |
| 1635 _mov(DestLo, T_Lo); |
| 1636 _adc(T_Hi, Src0HiR, Src1HiRF); |
| 1637 _mov(DestHi, T_Hi); |
| 1638 return; |
| 1639 } |
| 1640 case InstArithmetic::And: { |
| 1641 Variable *Src0LoR = SrcsLo.src0R(this); |
| 1642 Operand *Src1LoRF = SrcsLo.src1RF(this); |
| 1643 Variable *Src0HiR = SrcsHi.src0R(this); |
| 1644 Operand *Src1HiRF = SrcsHi.src1RF(this); |
| 1645 _and(T_Lo, Src0LoR, Src1LoRF); |
| 1646 _mov(DestLo, T_Lo); |
| 1647 _and(T_Hi, Src0HiR, Src1HiRF); |
| 1648 _mov(DestHi, T_Hi); |
| 1649 return; |
| 1650 } |
| 1651 case InstArithmetic::Or: { |
| 1652 Variable *Src0LoR = SrcsLo.src0R(this); |
| 1653 Operand *Src1LoRF = SrcsLo.src1RF(this); |
| 1654 Variable *Src0HiR = SrcsHi.src0R(this); |
| 1655 Operand *Src1HiRF = SrcsHi.src1RF(this); |
| 1656 _orr(T_Lo, Src0LoR, Src1LoRF); |
| 1657 _mov(DestLo, T_Lo); |
| 1658 _orr(T_Hi, Src0HiR, Src1HiRF); |
| 1659 _mov(DestHi, T_Hi); |
| 1660 return; |
| 1661 } |
| 1662 case InstArithmetic::Xor: { |
| 1663 Variable *Src0LoR = SrcsLo.src0R(this); |
| 1664 Operand *Src1LoRF = SrcsLo.src1RF(this); |
| 1665 Variable *Src0HiR = SrcsHi.src0R(this); |
| 1666 Operand *Src1HiRF = SrcsHi.src1RF(this); |
| 1667 _eor(T_Lo, Src0LoR, Src1LoRF); |
| 1668 _mov(DestLo, T_Lo); |
| 1669 _eor(T_Hi, Src0HiR, Src1HiRF); |
| 1670 _mov(DestHi, T_Hi); |
| 1671 return; |
| 1672 } |
| 1673 case InstArithmetic::Sub: { |
| 1674 Variable *Src0LoR = SrcsLo.src0R(this); |
| 1675 Operand *Src1LoRF = SrcsLo.src1RF(this); |
| 1676 Variable *Src0HiR = SrcsHi.src0R(this); |
| 1677 Operand *Src1HiRF = SrcsHi.src1RF(this); |
| 1678 if (SrcsLo.swappedOperands()) { |
| 1679 _rsbs(T_Lo, Src0LoR, Src1LoRF); |
| 1680 _mov(DestLo, T_Lo); |
| 1681 _rsc(T_Hi, Src0HiR, Src1HiRF); |
| 1682 _mov(DestHi, T_Hi); |
| 1683 } else { |
| 1684 _subs(T_Lo, Src0LoR, Src1LoRF); |
| 1685 _mov(DestLo, T_Lo); |
| 1686 _sbc(T_Hi, Src0HiR, Src1HiRF); |
| 1687 _mov(DestHi, T_Hi); |
| 1688 } |
| 1689 return; |
| 1690 } |
| 1691 case InstArithmetic::Mul: { |
| 1692 // GCC 4.8 does: |
| 1693 // a=b*c ==> |
| 1694 // t_acc =(mul) (b.lo * c.hi) |
| 1695 // t_acc =(mla) (c.lo * b.hi) + t_acc |
| 1696 // t.hi,t.lo =(umull) b.lo * c.lo |
| 1697 // t.hi += t_acc |
| 1698 // a.lo = t.lo |
| 1699 // a.hi = t.hi |
| 1700 // |
| 1701 // LLVM does: |
| 1702 // t.hi,t.lo =(umull) b.lo * c.lo |
| 1703 // t.hi =(mla) (b.lo * c.hi) + t.hi |
| 1704 // t.hi =(mla) (b.hi * c.lo) + t.hi |
| 1705 // a.lo = t.lo |
| 1706 // a.hi = t.hi |
| 1707 // |
| 1708 // LLVM's lowering has fewer instructions, but more register pressure: |
| 1709 // t.lo is live from beginning to end, while GCC delays the two-dest |
| 1710 // instruction till the end, and kills c.hi immediately. |
| 1711 Variable *T_Acc = makeReg(IceType_i32); |
| 1712 Variable *T_Acc1 = makeReg(IceType_i32); |
| 1713 Variable *T_Hi1 = makeReg(IceType_i32); |
| 1714 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); |
| 1715 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); |
| 1716 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); |
| 1717 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this); |
| 1718 _mul(T_Acc, Src0RLo, Src1RHi); |
| 1719 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
| 1720 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
| 1721 _add(T_Hi, T_Hi1, T_Acc1); |
| 1722 _mov(DestLo, T_Lo); |
| 1723 _mov(DestHi, T_Hi); |
| 1724 return; |
| 1725 } |
| 1726 case InstArithmetic::Shl: { |
| 1727 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { |
| 1728 Variable *Src0RLo = SrcsLo.src0R(this); |
| 1729 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. |
| 1730 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; |
| 1731 if (ShAmtImm == 0) { |
| 1732 _mov(DestLo, Src0RLo); |
| 1733 _mov(DestHi, SrcsHi.src0R(this)); |
| 1734 return; |
| 1735 } |
| 1736 |
| 1737 if (ShAmtImm >= 32) { |
| 1738 if (ShAmtImm == 32) { |
| 1739 _mov(DestHi, Src0RLo); |
| 1740 } else { |
| 1741 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), |
| 1742 Legal_Reg | Legal_Flex); |
| 1743 _lsl(T_Hi, Src0RLo, ShAmtOp); |
| 1744 _mov(DestHi, T_Hi); |
| 1745 } |
| 1746 |
| 1747 Operand *_0 = |
| 1748 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
| 1749 _mov(T_Lo, _0); |
| 1750 _mov(DestLo, T_Lo); |
| 1751 return; |
| 1752 } |
| 1753 |
| 1754 Variable *Src0RHi = SrcsHi.src0R(this); |
| 1755 Operand *ShAmtOp = |
| 1756 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); |
| 1757 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), |
| 1758 Legal_Reg | Legal_Flex); |
| 1759 _lsl(T_Hi, Src0RHi, ShAmtOp); |
| 1760 _orr(T_Hi, T_Hi, |
| 1761 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| 1762 OperandARM32::LSR, ComplShAmtOp)); |
| 1763 _mov(DestHi, T_Hi); |
| 1764 |
| 1765 _lsl(T_Lo, Src0RLo, ShAmtOp); |
| 1766 _mov(DestLo, T_Lo); |
| 1767 return; |
| 1768 } |
| 1769 |
| 1770 // a=b<<c ==> |
| 1771 // pnacl-llc does: |
| 1772 // mov t_b.lo, b.lo |
| 1773 // mov t_b.hi, b.hi |
| 1774 // mov t_c.lo, c.lo |
| 1775 // rsb T0, t_c.lo, #32 |
| 1776 // lsr T1, t_b.lo, T0 |
| 1777 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo |
| 1778 // sub T2, t_c.lo, #32 |
| 1779 // cmp T2, #0 |
| 1780 // lslge t_a.hi, t_b.lo, T2 |
| 1781 // lsl t_a.lo, t_b.lo, t_c.lo |
| 1782 // mov a.lo, t_a.lo |
| 1783 // mov a.hi, t_a.hi |
| 1784 // |
| 1785 // GCC 4.8 does: |
| 1786 // sub t_c1, c.lo, #32 |
| 1787 // lsl t_hi, b.hi, c.lo |
| 1788 // orr t_hi, t_hi, b.lo, lsl t_c1 |
| 1789 // rsb t_c2, c.lo, #32 |
| 1790 // orr t_hi, t_hi, b.lo, lsr t_c2 |
| 1791 // lsl t_lo, b.lo, c.lo |
| 1792 // a.lo = t_lo |
| 1793 // a.hi = t_hi |
| 1794 // |
| 1795 // These are incompatible, therefore we mimic pnacl-llc. |
| 1796 // Can be strength-reduced for constant-shifts, but we don't do that for |
| 1797 // now. |
| 1798 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
| 1799 // ARM, shifts only take the lower 8 bits of the shift register, and |
| 1800 // saturate to the range 0-32, so the negative value will saturate to 32. |
| 1801 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
| 1802 Operand *_0 = |
| 1803 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
| 1804 Variable *T0 = makeReg(IceType_i32); |
| 1805 Variable *T1 = makeReg(IceType_i32); |
| 1806 Variable *T2 = makeReg(IceType_i32); |
| 1807 Variable *TA_Hi = makeReg(IceType_i32); |
| 1808 Variable *TA_Lo = makeReg(IceType_i32); |
| 1809 Variable *Src0RLo = SrcsLo.src0R(this); |
| 1810 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); |
| 1811 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); |
| 1812 _rsb(T0, Src1RLo, _32); |
| 1813 _lsr(T1, Src0RLo, T0); |
| 1814 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1815 OperandARM32::LSL, Src1RLo)); |
| 1816 _sub(T2, Src1RLo, _32); |
| 1817 _cmp(T2, _0); |
| 1818 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); |
| 1819 _set_dest_redefined(); |
| 1820 _lsl(TA_Lo, Src0RLo, Src1RLo); |
| 1821 _mov(DestLo, TA_Lo); |
| 1822 _mov(DestHi, TA_Hi); |
| 1823 return; |
| 1824 } |
| 1825 case InstArithmetic::Lshr: |
| 1826 case InstArithmetic::Ashr: { |
| 1827 const bool ASR = Op == InstArithmetic::Ashr; |
| 1828 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { |
| 1829 Variable *Src0RHi = SrcsHi.src0R(this); |
| 1830 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. |
| 1831 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; |
| 1832 if (ShAmtImm == 0) { |
| 1833 _mov(DestHi, Src0RHi); |
| 1834 _mov(DestLo, SrcsLo.src0R(this)); |
| 1835 return; |
| 1836 } |
| 1837 |
| 1838 if (ShAmtImm >= 32) { |
| 1839 if (ShAmtImm == 32) { |
| 1840 _mov(DestLo, Src0RHi); |
| 1841 } else { |
| 1842 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), |
| 1843 Legal_Reg | Legal_Flex); |
| 1844 if (ASR) { |
| 1845 _asr(T_Lo, Src0RHi, ShAmtOp); |
| 1846 } else { |
| 1847 _lsr(T_Lo, Src0RHi, ShAmtOp); |
| 1848 } |
| 1849 _mov(DestLo, T_Lo); |
| 1850 } |
| 1851 |
| 1852 if (ASR) { |
| 1853 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32), |
| 1854 Legal_Reg | Legal_Flex); |
| 1855 _asr(T_Hi, Src0RHi, _31); |
| 1856 } else { |
| 1857 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), |
| 1858 Legal_Reg | Legal_Flex); |
| 1859 _mov(T_Hi, _0); |
| 1860 } |
| 1861 _mov(DestHi, T_Hi); |
| 1862 return; |
| 1863 } |
| 1864 |
| 1865 Variable *Src0RLo = SrcsLo.src0R(this); |
| 1866 Operand *ShAmtOp = |
| 1867 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); |
| 1868 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), |
| 1869 Legal_Reg | Legal_Flex); |
| 1870 _lsr(T_Lo, Src0RLo, ShAmtOp); |
| 1871 _orr(T_Lo, T_Lo, |
| 1872 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1873 OperandARM32::LSL, ComplShAmtOp)); |
| 1874 _mov(DestLo, T_Lo); |
| 1875 |
| 1876 if (ASR) { |
| 1877 _asr(T_Hi, Src0RHi, ShAmtOp); |
| 1878 } else { |
| 1879 _lsr(T_Hi, Src0RHi, ShAmtOp); |
| 1880 } |
| 1881 _mov(DestHi, T_Hi); |
| 1882 return; |
| 1883 } |
| 1884 |
| 1885 // a=b>>c |
| 1886 // pnacl-llc does: |
| 1887 // mov t_b.lo, b.lo |
| 1888 // mov t_b.hi, b.hi |
| 1889 // mov t_c.lo, c.lo |
| 1890 // lsr T0, t_b.lo, t_c.lo |
| 1891 // rsb T1, t_c.lo, #32 |
| 1892 // orr t_a.lo, T0, t_b.hi, lsl T1 |
| 1893 // sub T2, t_c.lo, #32 |
| 1894 // cmp T2, #0 |
| 1895 // [al]srge t_a.lo, t_b.hi, T2 |
| 1896 // [al]sr t_a.hi, t_b.hi, t_c.lo |
| 1897 // mov a.lo, t_a.lo |
| 1898 // mov a.hi, t_a.hi |
| 1899 // |
| 1900 // GCC 4.8 does (lsr): |
| 1901 // rsb t_c1, c.lo, #32 |
| 1902 // lsr t_lo, b.lo, c.lo |
| 1903 // orr t_lo, t_lo, b.hi, lsl t_c1 |
| 1904 // sub t_c2, c.lo, #32 |
| 1905 // orr t_lo, t_lo, b.hi, lsr t_c2 |
| 1906 // lsr t_hi, b.hi, c.lo |
| 1907 // mov a.lo, t_lo |
| 1908 // mov a.hi, t_hi |
| 1909 // |
| 1910 // These are incompatible, therefore we mimic pnacl-llc. |
| 1911 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
| 1912 Operand *_0 = |
| 1913 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
| 1914 Variable *T0 = makeReg(IceType_i32); |
| 1915 Variable *T1 = makeReg(IceType_i32); |
| 1916 Variable *T2 = makeReg(IceType_i32); |
| 1917 Variable *TA_Lo = makeReg(IceType_i32); |
| 1918 Variable *TA_Hi = makeReg(IceType_i32); |
| 1919 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); |
| 1920 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); |
| 1921 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); |
| 1922 _lsr(T0, Src0RLo, Src1RLo); |
| 1923 _rsb(T1, Src1RLo, _32); |
| 1924 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| 1925 OperandARM32::LSL, T1)); |
| 1926 _sub(T2, Src1RLo, _32); |
| 1927 _cmp(T2, _0); |
| 1928 if (ASR) { |
| 1929 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
| 1930 _set_dest_redefined(); |
| 1931 _asr(TA_Hi, Src0RHi, Src1RLo); |
| 1932 } else { |
| 1933 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
| 1934 _set_dest_redefined(); |
| 1935 _lsr(TA_Hi, Src0RHi, Src1RLo); |
| 1936 } |
| 1937 _mov(DestLo, TA_Lo); |
| 1938 _mov(DestHi, TA_Hi); |
| 1939 return; |
| 1940 } |
| 1941 case InstArithmetic::Fadd: |
| 1942 case InstArithmetic::Fsub: |
| 1943 case InstArithmetic::Fmul: |
| 1944 case InstArithmetic::Fdiv: |
| 1945 case InstArithmetic::Frem: |
| 1946 llvm::report_fatal_error("FP instruction with i64 type"); |
| 1947 return; |
| 1948 case InstArithmetic::Udiv: |
| 1949 case InstArithmetic::Sdiv: |
| 1950 case InstArithmetic::Urem: |
| 1951 case InstArithmetic::Srem: |
| 1952 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " |
| 1953 "should have already been handled before"); |
| 1954 return; |
| 1955 } |
| 1956 } |
| 1957 |
1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 1958 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
1408 Variable *Dest = Inst->getDest(); | 1959 Variable *Dest = Inst->getDest(); |
1409 if (Dest->getType() == IceType_i1) { | 1960 if (Dest->getType() == IceType_i1) { |
1410 lowerInt1Arithmetic(Inst); | 1961 lowerInt1Arithmetic(Inst); |
1411 return; | 1962 return; |
1412 } | 1963 } |
1413 | 1964 |
1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to | |
1415 // legalize Src0 to flex or Src1 to flex and there is a reversible | |
1416 // instruction. E.g., reverse subtract with immediate, register vs register, | |
1417 // immediate. | |
1418 // Or it may be the case that the operands aren't swapped, but the bits can | |
1419 // be flipped and a different operation applied. E.g., use BIC (bit clear) | |
1420 // instead of AND for some masks. | |
1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1965 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 1966 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
1423 if (Dest->getType() == IceType_i64) { | 1967 if (Dest->getType() == IceType_i64) { |
1424 // These helper-call-involved instructions are lowered in this separate | 1968 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); |
1425 // switch. This is because we would otherwise assume that we need to | 1969 return; |
1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | 1970 } |
1427 // helper calls, and such unused/redundant instructions will fail liveness | 1971 |
1428 // analysis under -Om1 setting. | 1972 if (isVectorType(Dest->getType())) { |
1429 switch (Inst->getOp()) { | |
1430 default: | |
1431 break; | |
1432 case InstArithmetic::Udiv: | |
1433 case InstArithmetic::Sdiv: | |
1434 case InstArithmetic::Urem: | |
1435 case InstArithmetic::Srem: { | |
1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
1438 // register, which will hide a constant source operand. Instead, check | |
1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | |
1441 if (C64->getValue() == 0) { | |
1442 _trap(); | |
1443 return; | |
1444 } | |
1445 } else { | |
1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
1448 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
1449 } | |
1450 // Technically, ARM has their own aeabi routines, but we can use the | |
1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
1452 // the more standard __moddi3 for rem. | |
1453 const char *HelperName = ""; | |
1454 switch (Inst->getOp()) { | |
1455 default: | |
1456 llvm_unreachable("Should have only matched div ops."); | |
1457 break; | |
1458 case InstArithmetic::Udiv: | |
1459 HelperName = H_udiv_i64; | |
1460 break; | |
1461 case InstArithmetic::Sdiv: | |
1462 HelperName = H_sdiv_i64; | |
1463 break; | |
1464 case InstArithmetic::Urem: | |
1465 HelperName = H_urem_i64; | |
1466 break; | |
1467 case InstArithmetic::Srem: | |
1468 HelperName = H_srem_i64; | |
1469 break; | |
1470 } | |
1471 constexpr SizeT MaxSrcs = 2; | |
1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
1473 Call->addArg(Src0); | |
1474 Call->addArg(Src1); | |
1475 lowerCall(Call); | |
1476 return; | |
1477 } | |
1478 } | |
1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0)); | |
1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0)); | |
1483 Operand *Src1Lo = loOperand(Src1); | |
1484 Operand *Src1Hi = hiOperand(Src1); | |
1485 Variable *T_Lo = makeReg(DestLo->getType()); | |
1486 Variable *T_Hi = makeReg(DestHi->getType()); | |
1487 switch (Inst->getOp()) { | |
1488 case InstArithmetic::_num: | |
1489 llvm_unreachable("Unknown arithmetic operator"); | |
1490 return; | |
1491 case InstArithmetic::Add: | |
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1494 _adds(T_Lo, Src0RLo, Src1Lo); | |
1495 _mov(DestLo, T_Lo); | |
1496 _adc(T_Hi, Src0RHi, Src1Hi); | |
1497 _mov(DestHi, T_Hi); | |
1498 return; | |
1499 case InstArithmetic::And: | |
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1502 _and(T_Lo, Src0RLo, Src1Lo); | |
1503 _mov(DestLo, T_Lo); | |
1504 _and(T_Hi, Src0RHi, Src1Hi); | |
1505 _mov(DestHi, T_Hi); | |
1506 return; | |
1507 case InstArithmetic::Or: | |
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1510 _orr(T_Lo, Src0RLo, Src1Lo); | |
1511 _mov(DestLo, T_Lo); | |
1512 _orr(T_Hi, Src0RHi, Src1Hi); | |
1513 _mov(DestHi, T_Hi); | |
1514 return; | |
1515 case InstArithmetic::Xor: | |
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1518 _eor(T_Lo, Src0RLo, Src1Lo); | |
1519 _mov(DestLo, T_Lo); | |
1520 _eor(T_Hi, Src0RHi, Src1Hi); | |
1521 _mov(DestHi, T_Hi); | |
1522 return; | |
1523 case InstArithmetic::Sub: | |
1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1526 _subs(T_Lo, Src0RLo, Src1Lo); | |
1527 _mov(DestLo, T_Lo); | |
1528 _sbc(T_Hi, Src0RHi, Src1Hi); | |
1529 _mov(DestHi, T_Hi); | |
1530 return; | |
1531 case InstArithmetic::Mul: { | |
1532 // GCC 4.8 does: | |
1533 // a=b*c ==> | |
1534 // t_acc =(mul) (b.lo * c.hi) | |
1535 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
1536 // t.hi,t.lo =(umull) b.lo * c.lo | |
1537 // t.hi += t_acc | |
1538 // a.lo = t.lo | |
1539 // a.hi = t.hi | |
1540 // | |
1541 // LLVM does: | |
1542 // t.hi,t.lo =(umull) b.lo * c.lo | |
1543 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
1544 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
1545 // a.lo = t.lo | |
1546 // a.hi = t.hi | |
1547 // | |
1548 // LLVM's lowering has fewer instructions, but more register pressure: | |
1549 // t.lo is live from beginning to end, while GCC delays the two-dest | |
1550 // instruction till the end, and kills c.hi immediately. | |
1551 Variable *T_Acc = makeReg(IceType_i32); | |
1552 Variable *T_Acc1 = makeReg(IceType_i32); | |
1553 Variable *T_Hi1 = makeReg(IceType_i32); | |
1554 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1555 Variable *Src1RHi = legalizeToReg(Src1Hi); | |
1556 _mul(T_Acc, Src0RLo, Src1RHi); | |
1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
1559 _add(T_Hi, T_Hi1, T_Acc1); | |
1560 _mov(DestLo, T_Lo); | |
1561 _mov(DestHi, T_Hi); | |
1562 return; | |
1563 } | |
1564 case InstArithmetic::Shl: { | |
1565 // a=b<<c ==> | |
1566 // pnacl-llc does: | |
1567 // mov t_b.lo, b.lo | |
1568 // mov t_b.hi, b.hi | |
1569 // mov t_c.lo, c.lo | |
1570 // rsb T0, t_c.lo, #32 | |
1571 // lsr T1, t_b.lo, T0 | |
1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
1573 // sub T2, t_c.lo, #32 | |
1574 // cmp T2, #0 | |
1575 // lslge t_a.hi, t_b.lo, T2 | |
1576 // lsl t_a.lo, t_b.lo, t_c.lo | |
1577 // mov a.lo, t_a.lo | |
1578 // mov a.hi, t_a.hi | |
1579 // | |
1580 // GCC 4.8 does: | |
1581 // sub t_c1, c.lo, #32 | |
1582 // lsl t_hi, b.hi, c.lo | |
1583 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
1584 // rsb t_c2, c.lo, #32 | |
1585 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
1586 // lsl t_lo, b.lo, c.lo | |
1587 // a.lo = t_lo | |
1588 // a.hi = t_hi | |
1589 // | |
1590 // These are incompatible, therefore we mimic pnacl-llc. | |
1591 // Can be strength-reduced for constant-shifts, but we don't do that for | |
1592 // now. | |
1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
1594 // ARM, shifts only take the lower 8 bits of the shift register, and | |
1595 // saturate to the range 0-32, so the negative value will saturate to 32. | |
1596 Constant *_32 = Ctx->getConstantInt32(32); | |
1597 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1598 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1599 Variable *T0 = makeReg(IceType_i32); | |
1600 Variable *T1 = makeReg(IceType_i32); | |
1601 Variable *T2 = makeReg(IceType_i32); | |
1602 Variable *TA_Hi = makeReg(IceType_i32); | |
1603 Variable *TA_Lo = makeReg(IceType_i32); | |
1604 _rsb(T0, Src1RLo, _32); | |
1605 _lsr(T1, Src0RLo, T0); | |
1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1607 OperandARM32::LSL, Src1RLo)); | |
1608 _sub(T2, Src1RLo, _32); | |
1609 _cmp(T2, _0); | |
1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
1611 _set_dest_redefined(); | |
1612 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
1613 _mov(DestLo, TA_Lo); | |
1614 _mov(DestHi, TA_Hi); | |
1615 return; | |
1616 } | |
1617 case InstArithmetic::Lshr: | |
1618 case InstArithmetic::Ashr: { | |
1619 // a=b>>c | |
1620 // pnacl-llc does: | |
1621 // mov t_b.lo, b.lo | |
1622 // mov t_b.hi, b.hi | |
1623 // mov t_c.lo, c.lo | |
1624 // lsr T0, t_b.lo, t_c.lo | |
1625 // rsb T1, t_c.lo, #32 | |
1626 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
1627 // sub T2, t_c.lo, #32 | |
1628 // cmp T2, #0 | |
1629 // [al]srge t_a.lo, t_b.hi, T2 | |
1630 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
1631 // mov a.lo, t_a.lo | |
1632 // mov a.hi, t_a.hi | |
1633 // | |
1634 // GCC 4.8 does (lsr): | |
1635 // rsb t_c1, c.lo, #32 | |
1636 // lsr t_lo, b.lo, c.lo | |
1637 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
1638 // sub t_c2, c.lo, #32 | |
1639 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
1640 // lsr t_hi, b.hi, c.lo | |
1641 // mov a.lo, t_lo | |
1642 // mov a.hi, t_hi | |
1643 // | |
1644 // These are incompatible, therefore we mimic pnacl-llc. | |
1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | |
1646 Constant *_32 = Ctx->getConstantInt32(32); | |
1647 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1648 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1649 Variable *T0 = makeReg(IceType_i32); | |
1650 Variable *T1 = makeReg(IceType_i32); | |
1651 Variable *T2 = makeReg(IceType_i32); | |
1652 Variable *TA_Lo = makeReg(IceType_i32); | |
1653 Variable *TA_Hi = makeReg(IceType_i32); | |
1654 _lsr(T0, Src0RLo, Src1RLo); | |
1655 _rsb(T1, Src1RLo, _32); | |
1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1657 OperandARM32::LSL, T1)); | |
1658 _sub(T2, Src1RLo, _32); | |
1659 _cmp(T2, _0); | |
1660 if (IsAshr) { | |
1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
1662 _set_dest_redefined(); | |
1663 _asr(TA_Hi, Src0RHi, Src1RLo); | |
1664 } else { | |
1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
1666 _set_dest_redefined(); | |
1667 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
1668 } | |
1669 _mov(DestLo, TA_Lo); | |
1670 _mov(DestHi, TA_Hi); | |
1671 return; | |
1672 } | |
1673 case InstArithmetic::Fadd: | |
1674 case InstArithmetic::Fsub: | |
1675 case InstArithmetic::Fmul: | |
1676 case InstArithmetic::Fdiv: | |
1677 case InstArithmetic::Frem: | |
1678 llvm_unreachable("FP instruction with i64 type"); | |
1679 return; | |
1680 case InstArithmetic::Udiv: | |
1681 case InstArithmetic::Sdiv: | |
1682 case InstArithmetic::Urem: | |
1683 case InstArithmetic::Srem: | |
1684 llvm_unreachable("Call-helper-involved instruction for i64 type " | |
1685 "should have already been handled before"); | |
1686 return; | |
1687 } | |
1688 return; | |
1689 } else if (isVectorType(Dest->getType())) { | |
1690 // Add a fake def to keep liveness consistent in the meantime. | 1973 // Add a fake def to keep liveness consistent in the meantime. |
1691 Variable *T = makeReg(Dest->getType()); | 1974 Variable *T = makeReg(Dest->getType()); |
1692 Context.insert(InstFakeDef::create(Func, T)); | 1975 Context.insert(InstFakeDef::create(Func, T)); |
1693 _mov(Dest, T); | 1976 _mov(Dest, T); |
1694 UnimplementedError(Func->getContext()->getFlags()); | 1977 UnimplementedError(Func->getContext()->getFlags()); |
1695 return; | 1978 return; |
1696 } | 1979 } |
| 1980 |
1697 // Dest->getType() is a non-i64 scalar. | 1981 // Dest->getType() is a non-i64 scalar. |
1698 Variable *Src0R = legalizeToReg(Src0); | |
1699 Variable *T = makeReg(Dest->getType()); | 1982 Variable *T = makeReg(Dest->getType()); |
1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect | 1983 |
| 1984 // * Handle div/rem separately. They require a non-legalized Src1 to inspect |
1701 // whether or not Src1 is a non-zero constant. Once legalized it is more | 1985 // whether or not Src1 is a non-zero constant. Once legalized it is more |
1702 // difficult to determine (constant may be moved to a register). | 1986 // difficult to determine (constant may be moved to a register). |
| 1987 // * Handle floating point arithmetic separately: they require Src1 to be |
| 1988 // legalized to a register. |
1703 switch (Inst->getOp()) { | 1989 switch (Inst->getOp()) { |
1704 default: | 1990 default: |
1705 break; | 1991 break; |
1706 case InstArithmetic::Udiv: { | 1992 case InstArithmetic::Udiv: { |
1707 constexpr bool NotRemainder = false; | 1993 constexpr bool NotRemainder = false; |
| 1994 Variable *Src0R = legalizeToReg(Src0); |
1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 1995 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
1709 H_udiv_i32, NotRemainder); | 1996 H_udiv_i32, NotRemainder); |
1710 return; | 1997 return; |
1711 } | 1998 } |
1712 case InstArithmetic::Sdiv: { | 1999 case InstArithmetic::Sdiv: { |
1713 constexpr bool NotRemainder = false; | 2000 constexpr bool NotRemainder = false; |
| 2001 Variable *Src0R = legalizeToReg(Src0); |
1714 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, | 2002 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, |
1715 H_sdiv_i32, NotRemainder); | 2003 H_sdiv_i32, NotRemainder); |
1716 return; | 2004 return; |
1717 } | 2005 } |
1718 case InstArithmetic::Urem: { | 2006 case InstArithmetic::Urem: { |
1719 constexpr bool IsRemainder = true; | 2007 constexpr bool IsRemainder = true; |
| 2008 Variable *Src0R = legalizeToReg(Src0); |
1720 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 2009 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
1721 H_urem_i32, IsRemainder); | 2010 H_urem_i32, IsRemainder); |
1722 return; | 2011 return; |
1723 } | 2012 } |
1724 case InstArithmetic::Srem: { | 2013 case InstArithmetic::Srem: { |
1725 constexpr bool IsRemainder = true; | 2014 constexpr bool IsRemainder = true; |
| 2015 Variable *Src0R = legalizeToReg(Src0); |
1726 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, | 2016 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, |
1727 H_srem_i32, IsRemainder); | 2017 H_srem_i32, IsRemainder); |
1728 return; | 2018 return; |
1729 } | 2019 } |
1730 case InstArithmetic::Frem: { | 2020 case InstArithmetic::Frem: { |
1731 const SizeT MaxSrcs = 2; | 2021 constexpr SizeT MaxSrcs = 2; |
| 2022 Variable *Src0R = legalizeToReg(Src0); |
1732 Type Ty = Dest->getType(); | 2023 Type Ty = Dest->getType(); |
1733 InstCall *Call = makeHelperCall( | 2024 InstCall *Call = makeHelperCall( |
1734 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 2025 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
1735 Call->addArg(Src0R); | 2026 Call->addArg(Src0R); |
1736 Call->addArg(Src1); | 2027 Call->addArg(Src1); |
1737 lowerCall(Call); | 2028 lowerCall(Call); |
1738 return; | 2029 return; |
1739 } | 2030 } |
1740 } | |
1741 | |
1742 // Handle floating point arithmetic separately: they require Src1 to be | |
1743 // legalized to a register. | |
1744 switch (Inst->getOp()) { | |
1745 default: | |
1746 break; | |
1747 case InstArithmetic::Fadd: { | 2031 case InstArithmetic::Fadd: { |
| 2032 Variable *Src0R = legalizeToReg(Src0); |
1748 Variable *Src1R = legalizeToReg(Src1); | 2033 Variable *Src1R = legalizeToReg(Src1); |
1749 _vadd(T, Src0R, Src1R); | 2034 _vadd(T, Src0R, Src1R); |
1750 _mov(Dest, T); | 2035 _mov(Dest, T); |
1751 return; | 2036 return; |
1752 } | 2037 } |
1753 case InstArithmetic::Fsub: { | 2038 case InstArithmetic::Fsub: { |
| 2039 Variable *Src0R = legalizeToReg(Src0); |
1754 Variable *Src1R = legalizeToReg(Src1); | 2040 Variable *Src1R = legalizeToReg(Src1); |
1755 _vsub(T, Src0R, Src1R); | 2041 _vsub(T, Src0R, Src1R); |
1756 _mov(Dest, T); | 2042 _mov(Dest, T); |
1757 return; | 2043 return; |
1758 } | 2044 } |
1759 case InstArithmetic::Fmul: { | 2045 case InstArithmetic::Fmul: { |
| 2046 Variable *Src0R = legalizeToReg(Src0); |
1760 Variable *Src1R = legalizeToReg(Src1); | 2047 Variable *Src1R = legalizeToReg(Src1); |
1761 _vmul(T, Src0R, Src1R); | 2048 _vmul(T, Src0R, Src1R); |
1762 _mov(Dest, T); | 2049 _mov(Dest, T); |
1763 return; | 2050 return; |
1764 } | 2051 } |
1765 case InstArithmetic::Fdiv: { | 2052 case InstArithmetic::Fdiv: { |
| 2053 Variable *Src0R = legalizeToReg(Src0); |
1766 Variable *Src1R = legalizeToReg(Src1); | 2054 Variable *Src1R = legalizeToReg(Src1); |
1767 _vdiv(T, Src0R, Src1R); | 2055 _vdiv(T, Src0R, Src1R); |
1768 _mov(Dest, T); | 2056 _mov(Dest, T); |
1769 return; | 2057 return; |
1770 } | 2058 } |
1771 } | 2059 } |
1772 | 2060 |
1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 2061 // Handle everything else here. |
| 2062 Int32Operands Srcs(Src0, Src1); |
1774 switch (Inst->getOp()) { | 2063 switch (Inst->getOp()) { |
1775 case InstArithmetic::_num: | 2064 case InstArithmetic::_num: |
1776 llvm_unreachable("Unknown arithmetic operator"); | 2065 llvm::report_fatal_error("Unknown arithmetic operator"); |
1777 return; | 2066 return; |
1778 case InstArithmetic::Add: | 2067 case InstArithmetic::Add: { |
| 2068 if (Srcs.hasConstOperand()) { |
| 2069 if (!Srcs.immediateIsFlexEncodable() && |
| 2070 Srcs.negatedImmediateIsFlexEncodable()) { |
| 2071 Variable *Src0R = Srcs.src0R(this); |
| 2072 Operand *Src1F = Srcs.negatedSrc1F(this); |
| 2073 if (!Srcs.swappedOperands()) { |
| 2074 _sub(T, Src0R, Src1F); |
| 2075 } else { |
| 2076 _rsb(T, Src0R, Src1F); |
| 2077 } |
| 2078 _mov(Dest, T); |
| 2079 return; |
| 2080 } |
| 2081 } |
| 2082 Variable *Src0R = Srcs.src0R(this); |
| 2083 Operand *Src1RF = Srcs.src1RF(this); |
1779 _add(T, Src0R, Src1RF); | 2084 _add(T, Src0R, Src1RF); |
1780 _mov(Dest, T); | 2085 _mov(Dest, T); |
1781 return; | 2086 return; |
1782 case InstArithmetic::And: | 2087 } |
| 2088 case InstArithmetic::And: { |
| 2089 if (Srcs.hasConstOperand()) { |
| 2090 if (!Srcs.immediateIsFlexEncodable() && |
| 2091 Srcs.invertedImmediateIsFlexEncodable()) { |
| 2092 Variable *Src0R = Srcs.src0R(this); |
| 2093 Operand *Src1F = Srcs.invertedSrc1F(this); |
| 2094 _bic(T, Src0R, Src1F); |
| 2095 _mov(Dest, T); |
| 2096 return; |
| 2097 } |
| 2098 } |
| 2099 Variable *Src0R = Srcs.src0R(this); |
| 2100 Operand *Src1RF = Srcs.src1RF(this); |
1783 _and(T, Src0R, Src1RF); | 2101 _and(T, Src0R, Src1RF); |
1784 _mov(Dest, T); | 2102 _mov(Dest, T); |
1785 return; | 2103 return; |
1786 case InstArithmetic::Or: | 2104 } |
| 2105 case InstArithmetic::Or: { |
| 2106 Variable *Src0R = Srcs.src0R(this); |
| 2107 Operand *Src1RF = Srcs.src1RF(this); |
1787 _orr(T, Src0R, Src1RF); | 2108 _orr(T, Src0R, Src1RF); |
1788 _mov(Dest, T); | 2109 _mov(Dest, T); |
1789 return; | 2110 return; |
1790 case InstArithmetic::Xor: | 2111 } |
| 2112 case InstArithmetic::Xor: { |
| 2113 Variable *Src0R = Srcs.src0R(this); |
| 2114 Operand *Src1RF = Srcs.src1RF(this); |
1791 _eor(T, Src0R, Src1RF); | 2115 _eor(T, Src0R, Src1RF); |
1792 _mov(Dest, T); | 2116 _mov(Dest, T); |
1793 return; | 2117 return; |
1794 case InstArithmetic::Sub: | 2118 } |
1795 _sub(T, Src0R, Src1RF); | 2119 case InstArithmetic::Sub: { |
| 2120 if (Srcs.hasConstOperand()) { |
| 2121 Variable *Src0R = Srcs.src0R(this); |
| 2122 if (Srcs.immediateIsFlexEncodable()) { |
| 2123 Operand *Src1RF = Srcs.src1RF(this); |
| 2124 if (Srcs.swappedOperands()) { |
| 2125 _rsb(T, Src0R, Src1RF); |
| 2126 } else { |
| 2127 _sub(T, Src0R, Src1RF); |
| 2128 } |
| 2129 _mov(Dest, T); |
| 2130 return; |
| 2131 } |
| 2132 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { |
| 2133 Operand *Src1F = Srcs.negatedSrc1F(this); |
| 2134 _add(T, Src0R, Src1F); |
| 2135 _mov(Dest, T); |
| 2136 return; |
| 2137 } |
| 2138 } |
| 2139 Variable *Src0R = Srcs.unswappedSrc0R(this); |
| 2140 Variable *Src1R = Srcs.unswappedSrc1R(this); |
| 2141 _sub(T, Src0R, Src1R); |
1796 _mov(Dest, T); | 2142 _mov(Dest, T); |
1797 return; | 2143 return; |
| 2144 } |
1798 case InstArithmetic::Mul: { | 2145 case InstArithmetic::Mul: { |
1799 Variable *Src1R = legalizeToReg(Src1RF); | 2146 Variable *Src0R = Srcs.unswappedSrc0R(this); |
| 2147 Variable *Src1R = Srcs.unswappedSrc1R(this); |
1800 _mul(T, Src0R, Src1R); | 2148 _mul(T, Src0R, Src1R); |
1801 _mov(Dest, T); | 2149 _mov(Dest, T); |
1802 return; | 2150 return; |
1803 } | 2151 } |
1804 case InstArithmetic::Shl: | 2152 case InstArithmetic::Shl: { |
1805 _lsl(T, Src0R, Src1RF); | 2153 Variable *Src0R = Srcs.unswappedSrc0R(this); |
| 2154 Operand *Src1R = Srcs.unswappedSrc1RF(this); |
| 2155 _lsl(T, Src0R, Src1R); |
1806 _mov(Dest, T); | 2156 _mov(Dest, T); |
1807 return; | 2157 return; |
1808 case InstArithmetic::Lshr: | 2158 } |
| 2159 case InstArithmetic::Lshr: { |
| 2160 Variable *Src0R = Srcs.unswappedSrc0R(this); |
1809 if (Dest->getType() != IceType_i32) { | 2161 if (Dest->getType() != IceType_i32) { |
1810 _uxt(Src0R, Src0R); | 2162 _uxt(Src0R, Src0R); |
1811 } | 2163 } |
1812 _lsr(T, Src0R, Src1RF); | 2164 _lsr(T, Src0R, Srcs.unswappedSrc1RF(this)); |
1813 _mov(Dest, T); | 2165 _mov(Dest, T); |
1814 return; | 2166 return; |
1815 case InstArithmetic::Ashr: | 2167 } |
| 2168 case InstArithmetic::Ashr: { |
| 2169 Variable *Src0R = Srcs.unswappedSrc0R(this); |
1816 if (Dest->getType() != IceType_i32) { | 2170 if (Dest->getType() != IceType_i32) { |
1817 _sxt(Src0R, Src0R); | 2171 _sxt(Src0R, Src0R); |
1818 } | 2172 } |
1819 _asr(T, Src0R, Src1RF); | 2173 _asr(T, Src0R, Srcs.unswappedSrc1RF(this)); |
1820 _mov(Dest, T); | 2174 _mov(Dest, T); |
1821 return; | 2175 return; |
| 2176 } |
1822 case InstArithmetic::Udiv: | 2177 case InstArithmetic::Udiv: |
1823 case InstArithmetic::Sdiv: | 2178 case InstArithmetic::Sdiv: |
1824 case InstArithmetic::Urem: | 2179 case InstArithmetic::Urem: |
1825 case InstArithmetic::Srem: | 2180 case InstArithmetic::Srem: |
1826 llvm_unreachable("Integer div/rem should have been handled earlier."); | 2181 llvm::report_fatal_error( |
| 2182 "Integer div/rem should have been handled earlier."); |
1827 return; | 2183 return; |
1828 case InstArithmetic::Fadd: | 2184 case InstArithmetic::Fadd: |
1829 case InstArithmetic::Fsub: | 2185 case InstArithmetic::Fsub: |
1830 case InstArithmetic::Fmul: | 2186 case InstArithmetic::Fmul: |
1831 case InstArithmetic::Fdiv: | 2187 case InstArithmetic::Fdiv: |
1832 case InstArithmetic::Frem: | 2188 case InstArithmetic::Frem: |
1833 llvm_unreachable("Floating point arith should have been handled earlier."); | 2189 llvm::report_fatal_error( |
| 2190 "Floating point arith should have been handled earlier."); |
1834 return; | 2191 return; |
1835 } | 2192 } |
1836 } | 2193 } |
1837 | 2194 |
1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { | 2195 void TargetARM32::lowerAssign(const InstAssign *Inst) { |
1839 Variable *Dest = Inst->getDest(); | 2196 Variable *Dest = Inst->getDest(); |
1840 Operand *Src0 = Inst->getSrc(0); | 2197 Operand *Src0 = Inst->getSrc(0); |
1841 assert(Dest->getType() == Src0->getType()); | 2198 assert(Dest->getType() == Src0->getType()); |
1842 if (Dest->getType() == IceType_i64) { | 2199 if (Dest->getType() == IceType_i64) { |
1843 Src0 = legalizeUndef(Src0); | 2200 Src0 = legalizeUndef(Src0); |
| 2201 |
| 2202 Variable *T_Lo = makeReg(IceType_i32); |
| 2203 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 2204 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1848 Variable *T_Lo = makeReg(IceType_i32); | |
1849 Variable *T_Hi = makeReg(IceType_i32); | |
1850 | |
1851 _mov(T_Lo, Src0Lo); | 2205 _mov(T_Lo, Src0Lo); |
1852 _mov(DestLo, T_Lo); | 2206 _mov(DestLo, T_Lo); |
| 2207 |
| 2208 Variable *T_Hi = makeReg(IceType_i32); |
| 2209 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2210 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
1853 _mov(T_Hi, Src0Hi); | 2211 _mov(T_Hi, Src0Hi); |
1854 _mov(DestHi, T_Hi); | 2212 _mov(DestHi, T_Hi); |
| 2213 |
| 2214 return; |
| 2215 } |
| 2216 |
| 2217 Operand *NewSrc; |
| 2218 if (Dest->hasReg()) { |
| 2219 // If Dest already has a physical register, then legalize the Src operand |
| 2220 // into a Variable with the same register assignment. This especially |
| 2221 // helps allow the use of Flex operands. |
| 2222 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
1855 } else { | 2223 } else { |
1856 Operand *NewSrc; | 2224 // Dest could be a stack operand. Since we could potentially need to do a |
1857 if (Dest->hasReg()) { | 2225 // Store (and store can only have Register operands), legalize this to a |
1858 // If Dest already has a physical register, then legalize the Src operand | 2226 // register. |
1859 // into a Variable with the same register assignment. This especially | 2227 NewSrc = legalize(Src0, Legal_Reg); |
1860 // helps allow the use of Flex operands. | |
1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
1862 } else { | |
1863 // Dest could be a stack operand. Since we could potentially need to do a | |
1864 // Store (and store can only have Register operands), legalize this to a | |
1865 // register. | |
1866 NewSrc = legalize(Src0, Legal_Reg); | |
1867 } | |
1868 if (isVectorType(Dest->getType())) { | |
1869 Variable *SrcR = legalizeToReg(NewSrc); | |
1870 _mov(Dest, SrcR); | |
1871 } else if (isFloatingType(Dest->getType())) { | |
1872 Variable *SrcR = legalizeToReg(NewSrc); | |
1873 _mov(Dest, SrcR); | |
1874 } else { | |
1875 _mov(Dest, NewSrc); | |
1876 } | |
1877 } | 2228 } |
| 2229 |
| 2230 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { |
| 2231 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); |
| 2232 } |
| 2233 _mov(Dest, NewSrc); |
1878 } | 2234 } |
1879 | 2235 |
1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 2236 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 2237 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 2238 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { |
1883 InstARM32Label *NewShortCircuitLabel = nullptr; | 2239 InstARM32Label *NewShortCircuitLabel = nullptr; |
1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 2240 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
1885 | 2241 |
1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); | 2242 const Inst *Producer = BoolComputations.getProducerOf(Boolean); |
1887 | 2243 |
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2573 struct { | 2929 struct { |
2574 CondARM32::Cond CC0; | 2930 CondARM32::Cond CC0; |
2575 CondARM32::Cond CC1; | 2931 CondARM32::Cond CC1; |
2576 } TableFcmp[] = { | 2932 } TableFcmp[] = { |
2577 #define X(val, CC0, CC1) \ | 2933 #define X(val, CC0, CC1) \ |
2578 { CondARM32::CC0, CondARM32::CC1 } \ | 2934 { CondARM32::CC0, CondARM32::CC1 } \ |
2579 , | 2935 , |
2580 FCMPARM32_TABLE | 2936 FCMPARM32_TABLE |
2581 #undef X | 2937 #undef X |
2582 }; | 2938 }; |
| 2939 |
| 2940 bool isFloatingPointZero(Operand *Src) { |
| 2941 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { |
| 2942 return Utils::isPositiveZero(F32->getValue()); |
| 2943 } |
| 2944 |
| 2945 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { |
| 2946 return Utils::isPositiveZero(F64->getValue()); |
| 2947 } |
| 2948 |
| 2949 return false; |
| 2950 } |
2583 } // end of anonymous namespace | 2951 } // end of anonymous namespace |
2584 | 2952 |
2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 2953 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
2586 InstFcmp::FCond Condition = Instr->getCondition(); | 2954 InstFcmp::FCond Condition = Instr->getCondition(); |
2587 switch (Condition) { | 2955 switch (Condition) { |
2588 case InstFcmp::False: | 2956 case InstFcmp::False: |
2589 return CondWhenTrue(CondARM32::kNone); | 2957 return CondWhenTrue(CondARM32::kNone); |
2590 case InstFcmp::True: | 2958 case InstFcmp::True: |
2591 return CondWhenTrue(CondARM32::AL); | 2959 return CondWhenTrue(CondARM32::AL); |
2592 break; | 2960 break; |
2593 default: { | 2961 default: { |
2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); | 2962 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); | 2963 Operand *Src1 = Instr->getSrc(1); |
2596 _vcmp(Src0R, Src1R); | 2964 if (isFloatingPointZero(Src1)) { |
| 2965 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); |
| 2966 } else { |
| 2967 _vcmp(Src0R, legalizeToReg(Src1)); |
| 2968 } |
2597 _vmrs(); | 2969 _vmrs(); |
2598 assert(Condition < llvm::array_lengthof(TableFcmp)); | 2970 assert(Condition < llvm::array_lengthof(TableFcmp)); |
2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); | 2971 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); |
2600 } | 2972 } |
2601 } | 2973 } |
2602 } | 2974 } |
2603 | 2975 |
2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { | 2976 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
2605 Variable *Dest = Instr->getDest(); | 2977 Variable *Dest = Instr->getDest(); |
2606 if (isVectorType(Dest->getType())) { | 2978 if (isVectorType(Dest->getType())) { |
(...skipping 28 matching lines...) Expand all Loading... |
2635 _mov(T, _1, Cond.WhenTrue0); | 3007 _mov(T, _1, Cond.WhenTrue0); |
2636 } | 3008 } |
2637 | 3009 |
2638 if (Cond.WhenTrue1 != CondARM32::kNone) { | 3010 if (Cond.WhenTrue1 != CondARM32::kNone) { |
2639 _mov_redefined(T, _1, Cond.WhenTrue1); | 3011 _mov_redefined(T, _1, Cond.WhenTrue1); |
2640 } | 3012 } |
2641 | 3013 |
2642 _mov(Dest, T); | 3014 _mov(Dest, T); |
2643 } | 3015 } |
2644 | 3016 |
2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { | 3017 TargetARM32::CondWhenTrue |
2646 assert(Inst->getSrc(0)->getType() != IceType_i1); | 3018 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
2647 assert(Inst->getSrc(1)->getType() != IceType_i1); | 3019 Operand *Src1) { |
| 3020 size_t Index = static_cast<size_t>(Condition); |
| 3021 assert(Index < llvm::array_lengthof(TableIcmp64)); |
2648 | 3022 |
2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 3023 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); |
2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 3024 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); |
| 3025 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); |
| 3026 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); |
| 3027 |
| 3028 if (SrcsLo.hasConstOperand()) { |
| 3029 const uint32_t ValueLo = SrcsLo.getConstantValue(); |
| 3030 const uint32_t ValueHi = SrcsHi.getConstantValue(); |
| 3031 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo; |
| 3032 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && |
| 3033 Value == 0) { |
| 3034 Variable *T = makeReg(IceType_i32); |
| 3035 Variable *Src0LoR = SrcsLo.src0R(this); |
| 3036 Variable *Src0HiR = SrcsHi.src0R(this); |
| 3037 _orrs(T, Src0LoR, Src0HiR); |
| 3038 Context.insert(InstFakeUse::create(Func, T)); |
| 3039 return CondWhenTrue(TableIcmp64[Index].C1); |
| 3040 } |
| 3041 |
| 3042 Variable *Src0RLo = SrcsLo.src0R(this); |
| 3043 Variable *Src0RHi = SrcsHi.src0R(this); |
| 3044 Operand *Src1RFLo = SrcsLo.src1RF(this); |
| 3045 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this); |
| 3046 |
| 3047 const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands(); |
| 3048 |
| 3049 if (UseRsb) { |
| 3050 if (TableIcmp64[Index].IsSigned) { |
| 3051 Variable *T = makeReg(IceType_i32); |
| 3052 _rsbs(T, Src0RLo, Src1RFLo); |
| 3053 Context.insert(InstFakeUse::create(Func, T)); |
| 3054 |
| 3055 T = makeReg(IceType_i32); |
| 3056 _rscs(T, Src0RHi, Src1RFHi); |
| 3057 // We need to add a FakeUse here because liveness gets mad at us (Def |
| 3058 // without Use.) Note that flag-setting instructions are considered to |
| 3059 // have side effects and, therefore, are not DCE'ed. |
| 3060 Context.insert(InstFakeUse::create(Func, T)); |
| 3061 } else { |
| 3062 Variable *T = makeReg(IceType_i32); |
| 3063 _rsbs(T, Src0RHi, Src1RFHi); |
| 3064 Context.insert(InstFakeUse::create(Func, T)); |
| 3065 |
| 3066 T = makeReg(IceType_i32); |
| 3067 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); |
| 3068 Context.insert(InstFakeUse::create(Func, T)); |
| 3069 } |
| 3070 } else { |
| 3071 if (TableIcmp64[Index].IsSigned) { |
| 3072 _cmp(Src0RLo, Src1RFLo); |
| 3073 Variable *T = makeReg(IceType_i32); |
| 3074 _sbcs(T, Src0RHi, Src1RFHi); |
| 3075 Context.insert(InstFakeUse::create(Func, T)); |
| 3076 } else { |
| 3077 _cmp(Src0RHi, Src1RFHi); |
| 3078 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); |
| 3079 } |
| 3080 } |
| 3081 |
| 3082 return CondWhenTrue(TableIcmp64[Index].C1); |
| 3083 } |
| 3084 |
| 3085 Variable *Src0RLo, *Src0RHi; |
| 3086 Operand *Src1RFLo, *Src1RFHi; |
| 3087 if (TableIcmp64[Index].Swapped) { |
| 3088 Src0RLo = legalizeToReg(loOperand(Src1)); |
| 3089 Src0RHi = legalizeToReg(hiOperand(Src1)); |
| 3090 Src1RFLo = legalizeToReg(loOperand(Src0)); |
| 3091 Src1RFHi = legalizeToReg(hiOperand(Src0)); |
| 3092 } else { |
| 3093 Src0RLo = legalizeToReg(loOperand(Src0)); |
| 3094 Src0RHi = legalizeToReg(hiOperand(Src0)); |
| 3095 Src1RFLo = legalizeToReg(loOperand(Src1)); |
| 3096 Src1RFHi = legalizeToReg(hiOperand(Src1)); |
| 3097 } |
2651 | 3098 |
2652 // a=icmp cond, b, c ==> | 3099 // a=icmp cond, b, c ==> |
2653 // GCC does: | 3100 // GCC does: |
2654 // cmp b.hi, c.hi or cmp b.lo, c.lo | 3101 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 3102 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2656 // mov.<C1> t, #1 mov.<C1> t, #1 | 3103 // mov.<C1> t, #1 mov.<C1> t, #1 |
2657 // mov.<C2> t, #0 mov.<C2> t, #0 | 3104 // mov.<C2> t, #0 mov.<C2> t, #0 |
2658 // mov a, t mov a, t | 3105 // mov a, t mov a, t |
2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 3106 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
2660 // is used for signed compares. In some cases, b and c need to be swapped as | 3107 // is used for signed compares. In some cases, b and c need to be swapped as |
(...skipping 10 matching lines...) Expand all Loading... |
2671 // that's nice in that it's just as short but has fewer dependencies for | 3118 // that's nice in that it's just as short but has fewer dependencies for |
2672 // better ILP at the cost of more registers. | 3119 // better ILP at the cost of more registers. |
2673 // | 3120 // |
2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 3121 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
2675 // unconditional mov #0, two cmps, two conditional mov #1, and one | 3122 // unconditional mov #0, two cmps, two conditional mov #1, and one |
2676 // conditional reg mov. That has few dependencies for good ILP, but is a | 3123 // conditional reg mov. That has few dependencies for good ILP, but is a |
2677 // longer sequence. | 3124 // longer sequence. |
2678 // | 3125 // |
2679 // So, we are going with the GCC version since it's usually better (except | 3126 // So, we are going with the GCC version since it's usually better (except |
2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 3127 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
| 3128 if (TableIcmp64[Index].IsSigned) { |
| 3129 Variable *ScratchReg = makeReg(IceType_i32); |
| 3130 _cmp(Src0RLo, Src1RFLo); |
| 3131 _sbcs(ScratchReg, Src0RHi, Src1RFHi); |
| 3132 // ScratchReg isn't going to be used, but we need the side-effect of |
| 3133 // setting flags from this operation. |
| 3134 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
| 3135 } else { |
| 3136 _cmp(Src0RHi, Src1RFHi); |
| 3137 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); |
| 3138 } |
| 3139 return CondWhenTrue(TableIcmp64[Index].C1); |
| 3140 } |
2681 | 3141 |
2682 if (Src0->getType() == IceType_i64) { | 3142 TargetARM32::CondWhenTrue |
2683 InstIcmp::ICond Conditon = Inst->getCondition(); | 3143 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
2684 size_t Index = static_cast<size_t>(Conditon); | 3144 Operand *Src1) { |
2685 assert(Index < llvm::array_lengthof(TableIcmp64)); | 3145 Int32Operands Srcs(Src0, Src1); |
2686 Variable *Src0Lo, *Src0Hi; | 3146 if (!Srcs.hasConstOperand()) { |
2687 Operand *Src1LoRF, *Src1HiRF; | 3147 |
2688 if (TableIcmp64[Index].Swapped) { | 3148 Variable *Src0R = Srcs.src0R(this); |
2689 Src0Lo = legalizeToReg(loOperand(Src1)); | 3149 Operand *Src1RF = Srcs.src1RF(this); |
2690 Src0Hi = legalizeToReg(hiOperand(Src1)); | 3150 _cmp(Src0R, Src1RF); |
2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 3151 return CondWhenTrue(getIcmp32Mapping(Condition)); |
2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
2693 } else { | |
2694 Src0Lo = legalizeToReg(loOperand(Src0)); | |
2695 Src0Hi = legalizeToReg(hiOperand(Src0)); | |
2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
2698 } | |
2699 if (TableIcmp64[Index].IsSigned) { | |
2700 Variable *ScratchReg = makeReg(IceType_i32); | |
2701 _cmp(Src0Lo, Src1LoRF); | |
2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | |
2703 // ScratchReg isn't going to be used, but we need the side-effect of | |
2704 // setting flags from this operation. | |
2705 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
2706 } else { | |
2707 _cmp(Src0Hi, Src1HiRF); | |
2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | |
2709 } | |
2710 return CondWhenTrue(TableIcmp64[Index].C1); | |
2711 } | 3152 } |
2712 | 3153 |
| 3154 Variable *Src0R = Srcs.src0R(this); |
| 3155 const int32_t Value = Srcs.getConstantValue(); |
| 3156 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { |
| 3157 _tst(Src0R, Src0R); |
| 3158 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 3159 } |
| 3160 |
| 3161 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() && |
| 3162 Srcs.negatedImmediateIsFlexEncodable()) { |
| 3163 Operand *Src1F = Srcs.negatedSrc1F(this); |
| 3164 _cmn(Src0R, Src1F); |
| 3165 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 3166 } |
| 3167 |
| 3168 Operand *Src1RF = Srcs.src1RF(this); |
| 3169 if (!Srcs.swappedOperands()) { |
| 3170 _cmp(Src0R, Src1RF); |
| 3171 } else { |
| 3172 Variable *T = makeReg(IceType_i32); |
| 3173 _rsbs(T, Src0R, Src1RF); |
| 3174 Context.insert(InstFakeUse::create(Func, T)); |
| 3175 } |
| 3176 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 3177 } |
| 3178 |
| 3179 TargetARM32::CondWhenTrue |
| 3180 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| 3181 Operand *Src1) { |
| 3182 Int32Operands Srcs(Src0, Src1); |
| 3183 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
| 3184 assert(ShAmt >= 0); |
| 3185 |
| 3186 if (!Srcs.hasConstOperand()) { |
| 3187 Variable *Src0R = makeReg(IceType_i32); |
| 3188 Operand *ShAmtF = |
| 3189 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex); |
| 3190 _lsl(Src0R, legalizeToReg(Src0), ShAmtF); |
| 3191 |
| 3192 Variable *Src1R = legalizeToReg(Src1); |
| 3193 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create( |
| 3194 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF); |
| 3195 _cmp(Src0R, Src1F); |
| 3196 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 3197 } |
| 3198 |
| 3199 const int32_t Value = Srcs.getConstantValue(); |
| 3200 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { |
| 3201 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt); |
| 3202 Variable *T = makeReg(IceType_i32); |
| 3203 _lsls(T, Srcs.src0R(this), ShAmtOp); |
| 3204 Context.insert(InstFakeUse::create(Func, T)); |
| 3205 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 3206 } |
| 3207 |
| 3208 Variable *ConstR = makeReg(IceType_i32); |
| 3209 _mov(ConstR, |
| 3210 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex)); |
| 3211 Operand *NonConstF = OperandARM32FlexReg::create( |
| 3212 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL, |
| 3213 Ctx->getConstantInt32(ShAmt)); |
| 3214 |
| 3215 if (Srcs.swappedOperands()) { |
| 3216 _cmp(ConstR, NonConstF); |
| 3217 } else { |
| 3218 Variable *T = makeReg(IceType_i32); |
| 3219 _rsbs(T, ConstR, NonConstF); |
| 3220 Context.insert(InstFakeUse::create(Func, T)); |
| 3221 } |
| 3222 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 3223 } |
| 3224 |
| 3225 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
| 3226 assert(Inst->getSrc(0)->getType() != IceType_i1); |
| 3227 assert(Inst->getSrc(1)->getType() != IceType_i1); |
| 3228 |
| 3229 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 3230 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
| 3231 |
| 3232 const InstIcmp::ICond Condition = Inst->getCondition(); |
2713 // a=icmp cond b, c ==> | 3233 // a=icmp cond b, c ==> |
2714 // GCC does: | 3234 // GCC does: |
2715 // <u/s>xtb tb, b | 3235 // <u/s>xtb tb, b |
2716 // <u/s>xtb tc, c | 3236 // <u/s>xtb tc, c |
2717 // cmp tb, tc | 3237 // cmp tb, tc |
2718 // mov.C1 t, #0 | 3238 // mov.C1 t, #0 |
2719 // mov.C2 t, #1 | 3239 // mov.C2 t, #1 |
2720 // mov a, t | 3240 // mov a, t |
2721 // where the unsigned/sign extension is not needed for 32-bit. They also have | 3241 // where the unsigned/sign extension is not needed for 32-bit. They also have |
2722 // special cases for EQ and NE. E.g., for NE: | 3242 // special cases for EQ and NE. E.g., for NE: |
2723 // <extend to tb, tc> | 3243 // <extend to tb, tc> |
2724 // subs t, tb, tc | 3244 // subs t, tb, tc |
2725 // movne t, #1 | 3245 // movne t, #1 |
2726 // mov a, t | 3246 // mov a, t |
2727 // | 3247 // |
2728 // LLVM does: | 3248 // LLVM does: |
2729 // lsl tb, b, #<N> | 3249 // lsl tb, b, #<N> |
2730 // mov t, #0 | 3250 // mov t, #0 |
2731 // cmp tb, c, lsl #<N> | 3251 // cmp tb, c, lsl #<N> |
2732 // mov.<C> t, #1 | 3252 // mov.<C> t, #1 |
2733 // mov a, t | 3253 // mov a, t |
2734 // | 3254 // |
2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on | 3255 // the left shift is by 0, 16, or 24, which allows the comparison to focus on |
2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 3256 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb | 3257 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
2738 // first. It's not clear to me why that special-casing is needed. | 3258 // first. It's not clear to me why that special-casing is needed. |
2739 // | 3259 // |
2740 // We'll go with the LLVM way for now, since it's shorter and has just as few | 3260 // We'll go with the LLVM way for now, since it's shorter and has just as few |
2741 // dependencies. | 3261 // dependencies. |
2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 3262 switch (Src0->getType()) { |
2743 assert(ShiftAmt >= 0); | 3263 default: |
2744 Constant *ShiftConst = nullptr; | 3264 llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); |
2745 Variable *Src0R = nullptr; | 3265 case IceType_i8: |
2746 if (ShiftAmt) { | 3266 case IceType_i16: |
2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 3267 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1); |
2748 Src0R = makeReg(IceType_i32); | 3268 case IceType_i32: |
2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 3269 return lowerInt32IcmpCond(Condition, Src0, Src1); |
2750 } else { | 3270 case IceType_i64: |
2751 Src0R = legalizeToReg(Src0); | 3271 return lowerInt64IcmpCond(Condition, Src0, Src1); |
2752 } | 3272 } |
2753 if (ShiftAmt) { | |
2754 Variable *Src1R = legalizeToReg(Src1); | |
2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( | |
2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); | |
2757 _cmp(Src0R, Src1RShifted); | |
2758 } else { | |
2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | |
2760 _cmp(Src0R, Src1RF); | |
2761 } | |
2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); | |
2763 } | 3273 } |
2764 | 3274 |
2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 3275 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
2766 Variable *Dest = Inst->getDest(); | 3276 Variable *Dest = Inst->getDest(); |
2767 | 3277 |
2768 if (isVectorType(Dest->getType())) { | 3278 if (isVectorType(Dest->getType())) { |
2769 Variable *T = makeReg(Dest->getType()); | 3279 Variable *T = makeReg(Dest->getType()); |
2770 Context.insert(InstFakeDef::create(Func, T)); | 3280 Context.insert(InstFakeDef::create(Func, T)); |
2771 _mov(Dest, T); | 3281 _mov(Dest, T); |
2772 UnimplementedError(Func->getContext()->getFlags()); | 3282 UnimplementedError(Func->getContext()->getFlags()); |
(...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4247 } | 4757 } |
4248 return Reg; | 4758 return Reg; |
4249 } | 4759 } |
4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 4760 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
4251 Variable *Reg = makeReg(Ty, RegNum); | 4761 Variable *Reg = makeReg(Ty, RegNum); |
4252 _movw(Reg, C); | 4762 _movw(Reg, C); |
4253 _movt(Reg, C); | 4763 _movt(Reg, C); |
4254 return Reg; | 4764 return Reg; |
4255 } else { | 4765 } else { |
4256 assert(isScalarFloatingType(Ty)); | 4766 assert(isScalarFloatingType(Ty)); |
| 4767 uint32_t ModifiedImm; |
| 4768 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
| 4769 Variable *T = makeReg(Ty, RegNum); |
| 4770 _mov(T, |
| 4771 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
| 4772 return T; |
| 4773 } |
| 4774 |
| 4775 if (Ty == IceType_f64 && isFloatingPointZero(From)) { |
| 4776 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 |
| 4777 // because ARM does not have a veor instruction with S registers. |
| 4778 Variable *T = makeReg(IceType_f64, RegNum); |
| 4779 Context.insert(InstFakeDef::create(Func, T)); |
| 4780 _veor(T, T, T); |
| 4781 return T; |
| 4782 } |
| 4783 |
4257 // Load floats/doubles from literal pool. | 4784 // Load floats/doubles from literal pool. |
4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an | |
4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified | |
4260 // immediate constants". Or, for 32-bit floating point numbers, just | |
4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG | |
4262 // instead of using a movw/movt pair to get the const-pool address then | |
4263 // loading to SREG. | |
4264 std::string Buffer; | 4785 std::string Buffer; |
4265 llvm::raw_string_ostream StrBuf(Buffer); | 4786 llvm::raw_string_ostream StrBuf(Buffer); |
4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 4787 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
4267 llvm::cast<Constant>(From)->setShouldBePooled(true); | 4788 llvm::cast<Constant>(From)->setShouldBePooled(true); |
4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 4789 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
4269 Variable *BaseReg = makeReg(getPointerType()); | 4790 Variable *BaseReg = makeReg(getPointerType()); |
4270 _movw(BaseReg, Offset); | 4791 _movw(BaseReg, Offset); |
4271 _movt(BaseReg, Offset); | 4792 _movt(BaseReg, Offset); |
4272 From = formMemoryOperand(BaseReg, Ty); | 4793 From = formMemoryOperand(BaseReg, Ty); |
4273 return copyToReg(From, RegNum); | 4794 return copyToReg(From, RegNum); |
(...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 5420 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
4900 // However, for compatibility with current NaCl LLVM, don't claim that. | 5421 // However, for compatibility with current NaCl LLVM, don't claim that. |
4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 5422 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
4902 } | 5423 } |
4903 | 5424 |
4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 5425 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 5426 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
4906 llvm::SmallBitVector TargetARM32::ScratchRegs; | 5427 llvm::SmallBitVector TargetARM32::ScratchRegs; |
4907 | 5428 |
4908 } // end of namespace Ice | 5429 } // end of namespace Ice |
OLD | NEW |