Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1290 } | 1290 } |
| 1291 _mov(Dest, SP); | 1291 _mov(Dest, SP); |
| 1292 } | 1292 } |
| 1293 | 1293 |
| 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { | 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
| 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) | 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) |
| 1296 return; | 1296 return; |
| 1297 Variable *SrcLoReg = legalizeToReg(SrcLo); | 1297 Variable *SrcLoReg = legalizeToReg(SrcLo); |
| 1298 switch (Ty) { | 1298 switch (Ty) { |
| 1299 default: | 1299 default: |
| 1300 llvm_unreachable("Unexpected type"); | 1300 llvm::report_fatal_error("Unexpected type"); |
| 1301 case IceType_i8: { | 1301 case IceType_i8: |
| 1302 Operand *Mask = | |
| 1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); | |
| 1304 _tst(SrcLoReg, Mask); | |
| 1305 break; | |
| 1306 } | |
| 1307 case IceType_i16: { | 1302 case IceType_i16: { |
| 1308 Operand *Mask = | 1303 Operand *ShAmtF = |
| 1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); | 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)), |
| 1310 _tst(SrcLoReg, Mask); | 1305 Legal_Reg | Legal_Flex); |
| 1311 break; | 1306 Variable *T = makeReg(IceType_i32); |
| 1312 } | 1307 _lsls(T, SrcLoReg, ShAmtF); |
| 1308 Context.insert(InstFakeUse::create(Func, T)); | |
| 1309 } break; | |
| 1313 case IceType_i32: { | 1310 case IceType_i32: { |
| 1314 _tst(SrcLoReg, SrcLoReg); | 1311 _tst(SrcLoReg, SrcLoReg); |
| 1315 break; | 1312 break; |
| 1316 } | 1313 } |
| 1317 case IceType_i64: { | 1314 case IceType_i64: { |
| 1318 Variable *ScratchReg = makeReg(IceType_i32); | 1315 Variable *T = makeReg(IceType_i32); |
| 1319 _orrs(ScratchReg, SrcLoReg, SrcHi); | 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); |
| 1320 // ScratchReg isn't going to be used, but we need the side-effect of | 1317 // T isn't going to be used, but we need the side-effect of setting flags |
| 1321 // setting flags from this operation. | 1318 // from this operation. |
| 1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 1319 Context.insert(InstFakeUse::create(Func, T)); |
| 1323 } | 1320 } |
| 1324 } | 1321 } |
| 1325 InstARM32Label *Label = InstARM32Label::create(Func, this); | 1322 InstARM32Label *Label = InstARM32Label::create(Func, this); |
| 1326 _br(Label, CondARM32::NE); | 1323 _br(Label, CondARM32::NE); |
| 1327 _trap(); | 1324 _trap(); |
| 1328 Context.insert(Label); | 1325 Context.insert(Label); |
| 1329 } | 1326 } |
| 1330 | 1327 |
| 1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
| 1332 Operand *Src1, ExtInstr ExtFunc, | 1329 Operand *Src1, ExtInstr ExtFunc, |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1397 _orr(T, Src0, Src1RF); | 1394 _orr(T, Src0, Src1RF); |
| 1398 break; | 1395 break; |
| 1399 case InstArithmetic::Xor: | 1396 case InstArithmetic::Xor: |
| 1400 _eor(T, Src0, Src1RF); | 1397 _eor(T, Src0, Src1RF); |
| 1401 break; | 1398 break; |
| 1402 } | 1399 } |
| 1403 _mov(Dest, T); | 1400 _mov(Dest, T); |
| 1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; | 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; |
| 1405 } | 1402 } |
| 1406 | 1403 |
| 1404 namespace { | |
| 1405 // NumericOperands is used during arithmetic/icmp lowering for constant folding. | |
| 1406 // It holds the two operantion's sources, and maintains some state as to whether | |
|
Jim Stichnoth
2015/11/16 23:06:26
operations'
John
2015/11/17 22:17:05
well, this is
the two sources of the operation
n
| |
| 1407 // one of them is a constant. If one of the operands is a constant, then it will | |
| 1408 // be stored as the operation's second source, with a bit indicating whether the | |
| 1409 // operands were swapped. | |
| 1410 // | |
| 1411 // The class is split in a base class with operand type-independent methods, and | |
|
Jim Stichnoth
2015/11/16 23:06:26
split into ?
John
2015/11/17 22:17:05
Done.
| |
| 1412 // a derived, templated class, for each type of operand we want to fold | |
| 1413 // constants for: | |
| 1414 // | |
| 1415 // NumericOperandsBase --> NumericOperands<ConstantFloat> | |
| 1416 // --> NumericOperands<ConstantDouble> | |
| 1417 // --> NumericOperands<ConstantInt32> | |
| 1418 // | |
| 1419 // NumericOperands<ConstantInt32> also exposes helper methods for emiting | |
|
Jim Stichnoth
2015/11/16 23:06:26
emitting
John
2015/11/17 22:17:05
Done.
| |
| 1420 // inverted/negated immediates. | |
| 1421 class NumericOperandsBase { | |
| 1422 NumericOperandsBase() = delete; | |
| 1423 NumericOperandsBase(const NumericOperandsBase &) = delete; | |
| 1424 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete; | |
| 1425 | |
| 1426 public: | |
| 1427 NumericOperandsBase(Operand *S0, Operand *S1) | |
| 1428 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)), | |
| 1429 Swapped(Src0 == S1 && S0 != S1) { | |
| 1430 assert(Src0 != nullptr); | |
| 1431 assert(Src1 != nullptr); | |
| 1432 assert(Src0 != Src1 || S0 == S1); | |
| 1433 } | |
| 1434 | |
| 1435 bool hasConstOperand() const { | |
| 1436 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1); | |
| 1437 } | |
| 1438 | |
| 1439 bool swappedOperands() const { return Swapped; } | |
| 1440 | |
| 1441 Variable *src0R(TargetARM32 *Target) const { | |
| 1442 return legalizeToReg(Target, Src0); | |
| 1443 } | |
| 1444 | |
| 1445 Variable *unswappedSrc0R(TargetARM32 *Target) const { | |
| 1446 return legalizeToReg(Target, Swapped ? Src1 : Src0); | |
| 1447 } | |
| 1448 | |
| 1449 Operand *src1RF(TargetARM32 *Target) const { | |
| 1450 return legalizeToRegOrFlex(Target, Src1); | |
| 1451 } | |
| 1452 | |
| 1453 Variable *unswappedSrc1R(TargetARM32 *Target) const { | |
| 1454 return legalizeToReg(Target, Swapped ? Src0 : Src1); | |
| 1455 } | |
| 1456 | |
| 1457 Operand *unswappedSrc1RF(TargetARM32 *Target) const { | |
| 1458 return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1); | |
| 1459 } | |
| 1460 | |
| 1461 protected: | |
| 1462 Operand *const Src0; | |
| 1463 Operand *const Src1; | |
| 1464 const bool Swapped; | |
| 1465 | |
| 1466 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) { | |
| 1467 return Target->legalizeToReg(Src); | |
| 1468 } | |
| 1469 | |
| 1470 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) { | |
| 1471 return Target->legalize(Src, | |
| 1472 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex); | |
| 1473 } | |
| 1474 | |
| 1475 private: | |
| 1476 static Operand *NonConstOperand(Operand *S0, Operand *S1) { | |
| 1477 if (!llvm::isa<Constant>(S0)) | |
| 1478 return S0; | |
| 1479 if (!llvm::isa<Constant>(S1)) | |
| 1480 return S1; | |
| 1481 if (llvm::isa<ConstantRelocatable>(S1) && | |
| 1482 !llvm::isa<ConstantRelocatable>(S0)) | |
| 1483 return S1; | |
| 1484 return S0; | |
| 1485 } | |
| 1486 | |
| 1487 static Operand *ConstOperand(Operand *S0, Operand *S1) { | |
| 1488 if (!llvm::isa<Constant>(S0)) | |
| 1489 return S1; | |
| 1490 if (!llvm::isa<Constant>(S1)) | |
| 1491 return S0; | |
| 1492 if (llvm::isa<ConstantRelocatable>(S1) && | |
| 1493 !llvm::isa<ConstantRelocatable>(S0)) | |
| 1494 return S0; | |
| 1495 return S1; | |
| 1496 } | |
| 1497 }; | |
| 1498 | |
| 1499 template <typename C> class NumericOperands : public NumericOperandsBase { | |
| 1500 NumericOperands() = delete; | |
| 1501 NumericOperands(const NumericOperands &) = delete; | |
| 1502 NumericOperands &operator=(const NumericOperands &) = delete; | |
| 1503 | |
| 1504 public: | |
| 1505 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) { | |
| 1506 assert(!hasConstOperand() || llvm::isa<C>(this->Src1)); | |
| 1507 } | |
| 1508 | |
| 1509 typename C::PrimType getConstantValue() const { | |
| 1510 return llvm::cast<C>(Src1)->getValue(); | |
| 1511 } | |
| 1512 }; | |
| 1513 | |
| 1514 using FloatOperands = NumericOperands<ConstantFloat>; | |
| 1515 using DoubleOperands = NumericOperands<ConstantDouble>; | |
| 1516 | |
| 1517 class Int32Operands : public NumericOperands<ConstantInteger32> { | |
| 1518 Int32Operands() = delete; | |
| 1519 Int32Operands(const Int32Operands &) = delete; | |
| 1520 Int32Operands &operator=(const Int32Operands &) = delete; | |
| 1521 | |
| 1522 public: | |
| 1523 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {} | |
| 1524 | |
| 1525 bool immediateIsFlexEncodable() const { | |
| 1526 uint32_t Rotate, Imm8; | |
| 1527 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8); | |
| 1528 } | |
| 1529 | |
| 1530 bool negatedImmediateIsFlexEncodable() const { | |
| 1531 uint32_t Rotate, Imm8; | |
| 1532 return OperandARM32FlexImm::canHoldImm( | |
| 1533 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8); | |
| 1534 } | |
| 1535 | |
| 1536 Operand *negatedSrc1F(TargetARM32 *Target) const { | |
| 1537 return legalizeToRegOrFlex(Target, | |
| 1538 Target->getCtx()->getConstantInt32( | |
| 1539 -static_cast<int32_t>(getConstantValue()))); | |
| 1540 } | |
| 1541 | |
| 1542 bool invertedImmediateIsFlexEncodable() const { | |
| 1543 uint32_t Rotate, Imm8; | |
| 1544 return OperandARM32FlexImm::canHoldImm( | |
| 1545 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8); | |
| 1546 } | |
| 1547 | |
| 1548 Operand *invertedSrc1F(TargetARM32 *Target) const { | |
| 1549 return legalizeToRegOrFlex(Target, | |
| 1550 Target->getCtx()->getConstantInt32( | |
| 1551 ~static_cast<uint32_t>(getConstantValue()))); | |
| 1552 } | |
| 1553 }; | |
| 1554 } // end of anonymous namespace | |
| 1555 | |
| 1556 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, | |
| 1557 Variable *Dest, Operand *Src0, | |
| 1558 Operand *Src1) { | |
| 1559 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); | |
| 1560 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); | |
| 1561 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); | |
| 1562 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); | |
| 1563 | |
| 1564 // These helper-call-involved instructions are lowered in this separate | |
| 1565 // switch. This is because we would otherwise assume that we need to | |
| 1566 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | |
| 1567 // helper calls, and such unused/redundant instructions will fail liveness | |
| 1568 // analysis under -Om1 setting. | |
| 1569 switch (Op) { | |
| 1570 default: | |
| 1571 break; | |
| 1572 case InstArithmetic::Udiv: | |
| 1573 case InstArithmetic::Sdiv: | |
| 1574 case InstArithmetic::Urem: | |
| 1575 case InstArithmetic::Srem: { | |
| 1576 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
| 1577 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
| 1578 // register, which will hide a constant source operand. Instead, check | |
| 1579 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
| 1580 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { | |
| 1581 if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) { | |
| 1582 _trap(); | |
| 1583 return; | |
| 1584 } | |
| 1585 } else { | |
| 1586 Operand *Src1Lo = SrcsLo.unswappedSrc1R(this); | |
| 1587 Operand *Src1Hi = SrcsHi.unswappedSrc1R(this); | |
| 1588 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
| 1589 } | |
| 1590 // Technically, ARM has its own aeabi routines, but we can use the | |
| 1591 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
| 1592 // the more standard __moddi3 for rem. | |
| 1593 const char *HelperName = ""; | |
| 1594 switch (Op) { | |
| 1595 default: | |
| 1596 llvm::report_fatal_error("Should have only matched div ops."); | |
| 1597 break; | |
| 1598 case InstArithmetic::Udiv: | |
| 1599 HelperName = H_udiv_i64; | |
| 1600 break; | |
| 1601 case InstArithmetic::Sdiv: | |
| 1602 HelperName = H_sdiv_i64; | |
| 1603 break; | |
| 1604 case InstArithmetic::Urem: | |
| 1605 HelperName = H_urem_i64; | |
| 1606 break; | |
| 1607 case InstArithmetic::Srem: | |
| 1608 HelperName = H_srem_i64; | |
| 1609 break; | |
| 1610 } | |
| 1611 constexpr SizeT MaxSrcs = 2; | |
| 1612 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
| 1613 Call->addArg(Src0); | |
| 1614 Call->addArg(Src1); | |
| 1615 lowerCall(Call); | |
| 1616 return; | |
| 1617 } | |
| 1618 } | |
| 1619 | |
| 1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1622 Variable *T_Lo = makeReg(DestLo->getType()); | |
| 1623 Variable *T_Hi = makeReg(DestHi->getType()); | |
| 1624 | |
| 1625 switch (Op) { | |
| 1626 case InstArithmetic::_num: | |
| 1627 llvm::report_fatal_error("Unknown arithmetic operator"); | |
| 1628 return; | |
| 1629 case InstArithmetic::Add: | |
| 1630 _adds(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this)); | |
|
Jim Stichnoth
2015/11/16 23:06:26
Don't do this. src0R() and src1RF() have side eff
John
2015/11/17 22:17:05
doh... done.
| |
| 1631 _mov(DestLo, T_Lo); | |
| 1632 _adc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this)); | |
| 1633 _mov(DestHi, T_Hi); | |
| 1634 return; | |
| 1635 case InstArithmetic::And: | |
| 1636 _and(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this)); | |
| 1637 _mov(DestLo, T_Lo); | |
| 1638 _and(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this)); | |
| 1639 _mov(DestHi, T_Hi); | |
| 1640 return; | |
| 1641 case InstArithmetic::Or: | |
| 1642 _orr(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this)); | |
| 1643 _mov(DestLo, T_Lo); | |
| 1644 _orr(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this)); | |
| 1645 _mov(DestHi, T_Hi); | |
| 1646 return; | |
| 1647 case InstArithmetic::Xor: | |
| 1648 _eor(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this)); | |
| 1649 _mov(DestLo, T_Lo); | |
| 1650 _eor(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this)); | |
| 1651 _mov(DestHi, T_Hi); | |
| 1652 return; | |
| 1653 case InstArithmetic::Sub: | |
| 1654 if (SrcsLo.swappedOperands()) { | |
| 1655 _rsbs(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this)); | |
| 1656 _mov(DestLo, T_Lo); | |
| 1657 _rsc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this)); | |
| 1658 _mov(DestHi, T_Hi); | |
| 1659 } else { | |
| 1660 _subs(T_Lo, SrcsLo.src0R(this), SrcsLo.src1RF(this)); | |
| 1661 _mov(DestLo, T_Lo); | |
| 1662 _sbc(T_Hi, SrcsHi.src0R(this), SrcsHi.src1RF(this)); | |
| 1663 _mov(DestHi, T_Hi); | |
| 1664 } | |
| 1665 return; | |
| 1666 case InstArithmetic::Mul: { | |
| 1667 // GCC 4.8 does: | |
| 1668 // a=b*c ==> | |
| 1669 // t_acc =(mul) (b.lo * c.hi) | |
| 1670 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
| 1671 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1672 // t.hi += t_acc | |
| 1673 // a.lo = t.lo | |
| 1674 // a.hi = t.hi | |
| 1675 // | |
| 1676 // LLVM does: | |
| 1677 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1678 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
| 1679 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
| 1680 // a.lo = t.lo | |
| 1681 // a.hi = t.hi | |
| 1682 // | |
| 1683 // LLVM's lowering has fewer instructions, but more register pressure: | |
| 1684 // t.lo is live from beginning to end, while GCC delays the two-dest | |
| 1685 // instruction till the end, and kills c.hi immediately. | |
| 1686 Variable *T_Acc = makeReg(IceType_i32); | |
| 1687 Variable *T_Acc1 = makeReg(IceType_i32); | |
| 1688 Variable *T_Hi1 = makeReg(IceType_i32); | |
| 1689 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); | |
| 1690 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); | |
| 1691 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); | |
| 1692 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this); | |
| 1693 _mul(T_Acc, Src0RLo, Src1RHi); | |
| 1694 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
| 1695 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
| 1696 _add(T_Hi, T_Hi1, T_Acc1); | |
| 1697 _mov(DestLo, T_Lo); | |
| 1698 _mov(DestHi, T_Hi); | |
| 1699 return; | |
| 1700 } | |
| 1701 case InstArithmetic::Shl: { | |
| 1702 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { | |
| 1703 Variable *Src0RLo = SrcsLo.src0R(this); | |
| 1704 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | |
| 1705 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; | |
| 1706 if (ShAmtImm == 0) { | |
| 1707 _mov(DestLo, Src0RLo); | |
| 1708 _mov(DestHi, SrcsHi.src0R(this)); | |
| 1709 return; | |
| 1710 } | |
| 1711 | |
| 1712 if (ShAmtImm >= 32) { | |
| 1713 if (ShAmtImm == 32) { | |
| 1714 _mov(DestHi, Src0RLo); | |
| 1715 } else { | |
| 1716 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), | |
| 1717 Legal_Reg | Legal_Flex); | |
| 1718 _lsl(T_Hi, Src0RLo, ShAmtOp); | |
| 1719 _mov(DestHi, T_Hi); | |
| 1720 } | |
| 1721 | |
| 1722 Operand *_0 = | |
| 1723 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | |
| 1724 _mov(T_Lo, _0); | |
| 1725 _mov(DestLo, T_Lo); | |
| 1726 return; | |
| 1727 } | |
| 1728 | |
| 1729 Variable *Src0RHi = SrcsHi.src0R(this); | |
| 1730 Operand *ShAmtOp = | |
| 1731 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); | |
| 1732 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), | |
| 1733 Legal_Reg | Legal_Flex); | |
| 1734 _lsl(T_Hi, Src0RHi, ShAmtOp); | |
| 1735 _orr(T_Hi, T_Hi, | |
| 1736 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | |
| 1737 OperandARM32::LSR, ComplShAmtOp)); | |
| 1738 _mov(DestHi, T_Hi); | |
| 1739 | |
| 1740 _lsl(T_Lo, Src0RLo, ShAmtOp); | |
| 1741 _mov(DestLo, T_Lo); | |
| 1742 return; | |
| 1743 } | |
| 1744 | |
| 1745 // a=b<<c ==> | |
| 1746 // pnacl-llc does: | |
| 1747 // mov t_b.lo, b.lo | |
| 1748 // mov t_b.hi, b.hi | |
| 1749 // mov t_c.lo, c.lo | |
| 1750 // rsb T0, t_c.lo, #32 | |
| 1751 // lsr T1, t_b.lo, T0 | |
| 1752 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
| 1753 // sub T2, t_c.lo, #32 | |
| 1754 // cmp T2, #0 | |
| 1755 // lslge t_a.hi, t_b.lo, T2 | |
| 1756 // lsl t_a.lo, t_b.lo, t_c.lo | |
| 1757 // mov a.lo, t_a.lo | |
| 1758 // mov a.hi, t_a.hi | |
| 1759 // | |
| 1760 // GCC 4.8 does: | |
| 1761 // sub t_c1, c.lo, #32 | |
| 1762 // lsl t_hi, b.hi, c.lo | |
| 1763 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
| 1764 // rsb t_c2, c.lo, #32 | |
| 1765 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
| 1766 // lsl t_lo, b.lo, c.lo | |
| 1767 // a.lo = t_lo | |
| 1768 // a.hi = t_hi | |
| 1769 // | |
| 1770 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1771 // Can be strength-reduced for constant-shifts, but we don't do that for | |
| 1772 // now. | |
| 1773 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
| 1774 // ARM, shifts only take the lower 8 bits of the shift register, and | |
| 1775 // saturate to the range 0-32, so the negative value will saturate to 32. | |
| 1776 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | |
|
Jim Stichnoth
2015/11/16 23:06:26
Maybe this should be named _32RF?
John
2015/11/17 22:17:05
I'd rather not. This is the number 32, not somethi
| |
| 1777 Operand *_0 = | |
| 1778 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | |
| 1779 Variable *T0 = makeReg(IceType_i32); | |
| 1780 Variable *T1 = makeReg(IceType_i32); | |
| 1781 Variable *T2 = makeReg(IceType_i32); | |
| 1782 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1783 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1784 Variable *Src0RLo = SrcsLo.src0R(this); | |
| 1785 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); | |
| 1786 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); | |
| 1787 _rsb(T0, Src1RLo, _32); | |
| 1788 _lsr(T1, Src0RLo, T0); | |
| 1789 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1790 OperandARM32::LSL, Src1RLo)); | |
| 1791 _sub(T2, Src1RLo, _32); | |
| 1792 _cmp(T2, _0); | |
| 1793 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
| 1794 _set_dest_redefined(); | |
| 1795 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
| 1796 _mov(DestLo, TA_Lo); | |
| 1797 _mov(DestHi, TA_Hi); | |
| 1798 return; | |
| 1799 } | |
| 1800 case InstArithmetic::Lshr: | |
| 1801 case InstArithmetic::Ashr: { | |
| 1802 const bool ASR = Op == InstArithmetic::Ashr; | |
| 1803 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { | |
| 1804 Variable *Src0RHi = SrcsHi.src0R(this); | |
| 1805 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | |
| 1806 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; | |
| 1807 if (ShAmtImm == 0) { | |
| 1808 _mov(DestHi, Src0RHi); | |
| 1809 _mov(DestLo, SrcsLo.src0R(this)); | |
| 1810 return; | |
| 1811 } | |
| 1812 | |
| 1813 if (ShAmtImm >= 32) { | |
| 1814 if (ShAmtImm == 32) { | |
| 1815 _mov(DestLo, Src0RHi); | |
| 1816 } else { | |
| 1817 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), | |
| 1818 Legal_Reg | Legal_Flex); | |
| 1819 if (ASR) { | |
| 1820 _asr(T_Lo, Src0RHi, ShAmtOp); | |
| 1821 } else { | |
| 1822 _lsr(T_Lo, Src0RHi, ShAmtOp); | |
| 1823 } | |
| 1824 _mov(DestLo, T_Lo); | |
| 1825 } | |
| 1826 | |
| 1827 if (ASR) { | |
| 1828 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32), | |
| 1829 Legal_Reg | Legal_Flex); | |
| 1830 _asr(T_Hi, Src0RHi, _31); | |
| 1831 } else { | |
| 1832 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), | |
| 1833 Legal_Reg | Legal_Flex); | |
| 1834 _mov(T_Hi, _0); | |
| 1835 } | |
| 1836 _mov(DestHi, T_Hi); | |
| 1837 return; | |
| 1838 } | |
| 1839 | |
| 1840 Variable *Src0RLo = SrcsLo.src0R(this); | |
| 1841 Operand *ShAmtOp = | |
| 1842 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); | |
| 1843 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), | |
| 1844 Legal_Reg | Legal_Flex); | |
| 1845 _lsr(T_Lo, Src0RLo, ShAmtOp); | |
| 1846 _orr(T_Lo, T_Lo, | |
| 1847 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1848 OperandARM32::LSL, ComplShAmtOp)); | |
| 1849 _mov(DestLo, T_Lo); | |
| 1850 | |
| 1851 if (ASR) { | |
| 1852 _asr(T_Hi, Src0RHi, ShAmtOp); | |
| 1853 } else { | |
| 1854 _lsr(T_Hi, Src0RHi, ShAmtOp); | |
| 1855 } | |
| 1856 _mov(DestHi, T_Hi); | |
| 1857 return; | |
| 1858 } | |
| 1859 | |
| 1860 // a=b>>c | |
| 1861 // pnacl-llc does: | |
| 1862 // mov t_b.lo, b.lo | |
| 1863 // mov t_b.hi, b.hi | |
| 1864 // mov t_c.lo, c.lo | |
| 1865 // lsr T0, t_b.lo, t_c.lo | |
| 1866 // rsb T1, t_c.lo, #32 | |
| 1867 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
| 1868 // sub T2, t_c.lo, #32 | |
| 1869 // cmp T2, #0 | |
| 1870 // [al]srge t_a.lo, t_b.hi, T2 | |
| 1871 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
| 1872 // mov a.lo, t_a.lo | |
| 1873 // mov a.hi, t_a.hi | |
| 1874 // | |
| 1875 // GCC 4.8 does (lsr): | |
| 1876 // rsb t_c1, c.lo, #32 | |
| 1877 // lsr t_lo, b.lo, c.lo | |
| 1878 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
| 1879 // sub t_c2, c.lo, #32 | |
| 1880 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
| 1881 // lsr t_hi, b.hi, c.lo | |
| 1882 // mov a.lo, t_lo | |
| 1883 // mov a.hi, t_hi | |
| 1884 // | |
| 1885 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1886 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | |
| 1887 Operand *_0 = | |
| 1888 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | |
| 1889 Variable *T0 = makeReg(IceType_i32); | |
| 1890 Variable *T1 = makeReg(IceType_i32); | |
| 1891 Variable *T2 = makeReg(IceType_i32); | |
| 1892 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1893 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1894 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); | |
| 1895 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); | |
| 1896 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); | |
| 1897 _lsr(T0, Src0RLo, Src1RLo); | |
| 1898 _rsb(T1, Src1RLo, _32); | |
| 1899 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1900 OperandARM32::LSL, T1)); | |
| 1901 _sub(T2, Src1RLo, _32); | |
| 1902 _cmp(T2, _0); | |
| 1903 if (ASR) { | |
| 1904 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1905 _set_dest_redefined(); | |
| 1906 _asr(TA_Hi, Src0RHi, Src1RLo); | |
| 1907 } else { | |
| 1908 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1909 _set_dest_redefined(); | |
| 1910 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
| 1911 } | |
| 1912 _mov(DestLo, TA_Lo); | |
| 1913 _mov(DestHi, TA_Hi); | |
| 1914 return; | |
| 1915 } | |
| 1916 case InstArithmetic::Fadd: | |
| 1917 case InstArithmetic::Fsub: | |
| 1918 case InstArithmetic::Fmul: | |
| 1919 case InstArithmetic::Fdiv: | |
| 1920 case InstArithmetic::Frem: | |
| 1921 llvm::report_fatal_error("FP instruction with i64 type"); | |
| 1922 return; | |
| 1923 case InstArithmetic::Udiv: | |
| 1924 case InstArithmetic::Sdiv: | |
| 1925 case InstArithmetic::Urem: | |
| 1926 case InstArithmetic::Srem: | |
| 1927 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " | |
| 1928 "should have already been handled before"); | |
| 1929 return; | |
| 1930 } | |
| 1931 } | |
| 1932 | |
| 1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 1933 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| 1408 Variable *Dest = Inst->getDest(); | 1934 Variable *Dest = Inst->getDest(); |
| 1409 if (Dest->getType() == IceType_i1) { | 1935 if (Dest->getType() == IceType_i1) { |
| 1410 lowerInt1Arithmetic(Inst); | 1936 lowerInt1Arithmetic(Inst); |
| 1411 return; | 1937 return; |
| 1412 } | 1938 } |
| 1413 | 1939 |
| 1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to | |
| 1415 // legalize Src0 to flex or Src1 to flex and there is a reversible | |
| 1416 // instruction. E.g., reverse subtract with immediate, register vs register, | |
| 1417 // immediate. | |
| 1418 // Or it may be the case that the operands aren't swapped, but the bits can | |
| 1419 // be flipped and a different operation applied. E.g., use BIC (bit clear) | |
| 1420 // instead of AND for some masks. | |
| 1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1940 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 1941 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
| 1423 if (Dest->getType() == IceType_i64) { | 1942 if (Dest->getType() == IceType_i64) { |
| 1424 // These helper-call-involved instructions are lowered in this separate | 1943 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); |
| 1425 // switch. This is because we would otherwise assume that we need to | 1944 return; |
| 1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | 1945 } |
| 1427 // helper calls, and such unused/redundant instructions will fail liveness | 1946 |
| 1428 // analysis under -Om1 setting. | 1947 if (isVectorType(Dest->getType())) { |
| 1429 switch (Inst->getOp()) { | |
| 1430 default: | |
| 1431 break; | |
| 1432 case InstArithmetic::Udiv: | |
| 1433 case InstArithmetic::Sdiv: | |
| 1434 case InstArithmetic::Urem: | |
| 1435 case InstArithmetic::Srem: { | |
| 1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
| 1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
| 1438 // register, which will hide a constant source operand. Instead, check | |
| 1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
| 1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | |
| 1441 if (C64->getValue() == 0) { | |
| 1442 _trap(); | |
| 1443 return; | |
| 1444 } | |
| 1445 } else { | |
| 1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
| 1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
| 1448 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
| 1449 } | |
| 1450 // Technically, ARM has their own aeabi routines, but we can use the | |
| 1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
| 1452 // the more standard __moddi3 for rem. | |
| 1453 const char *HelperName = ""; | |
| 1454 switch (Inst->getOp()) { | |
| 1455 default: | |
| 1456 llvm_unreachable("Should have only matched div ops."); | |
| 1457 break; | |
| 1458 case InstArithmetic::Udiv: | |
| 1459 HelperName = H_udiv_i64; | |
| 1460 break; | |
| 1461 case InstArithmetic::Sdiv: | |
| 1462 HelperName = H_sdiv_i64; | |
| 1463 break; | |
| 1464 case InstArithmetic::Urem: | |
| 1465 HelperName = H_urem_i64; | |
| 1466 break; | |
| 1467 case InstArithmetic::Srem: | |
| 1468 HelperName = H_srem_i64; | |
| 1469 break; | |
| 1470 } | |
| 1471 constexpr SizeT MaxSrcs = 2; | |
| 1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
| 1473 Call->addArg(Src0); | |
| 1474 Call->addArg(Src1); | |
| 1475 lowerCall(Call); | |
| 1476 return; | |
| 1477 } | |
| 1478 } | |
| 1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 1483 Operand *Src1Lo = loOperand(Src1); | |
| 1484 Operand *Src1Hi = hiOperand(Src1); | |
| 1485 Variable *T_Lo = makeReg(DestLo->getType()); | |
| 1486 Variable *T_Hi = makeReg(DestHi->getType()); | |
| 1487 switch (Inst->getOp()) { | |
| 1488 case InstArithmetic::_num: | |
| 1489 llvm_unreachable("Unknown arithmetic operator"); | |
| 1490 return; | |
| 1491 case InstArithmetic::Add: | |
| 1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1494 _adds(T_Lo, Src0RLo, Src1Lo); | |
| 1495 _mov(DestLo, T_Lo); | |
| 1496 _adc(T_Hi, Src0RHi, Src1Hi); | |
| 1497 _mov(DestHi, T_Hi); | |
| 1498 return; | |
| 1499 case InstArithmetic::And: | |
| 1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1502 _and(T_Lo, Src0RLo, Src1Lo); | |
| 1503 _mov(DestLo, T_Lo); | |
| 1504 _and(T_Hi, Src0RHi, Src1Hi); | |
| 1505 _mov(DestHi, T_Hi); | |
| 1506 return; | |
| 1507 case InstArithmetic::Or: | |
| 1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1510 _orr(T_Lo, Src0RLo, Src1Lo); | |
| 1511 _mov(DestLo, T_Lo); | |
| 1512 _orr(T_Hi, Src0RHi, Src1Hi); | |
| 1513 _mov(DestHi, T_Hi); | |
| 1514 return; | |
| 1515 case InstArithmetic::Xor: | |
| 1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1518 _eor(T_Lo, Src0RLo, Src1Lo); | |
| 1519 _mov(DestLo, T_Lo); | |
| 1520 _eor(T_Hi, Src0RHi, Src1Hi); | |
| 1521 _mov(DestHi, T_Hi); | |
| 1522 return; | |
| 1523 case InstArithmetic::Sub: | |
| 1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1526 _subs(T_Lo, Src0RLo, Src1Lo); | |
| 1527 _mov(DestLo, T_Lo); | |
| 1528 _sbc(T_Hi, Src0RHi, Src1Hi); | |
| 1529 _mov(DestHi, T_Hi); | |
| 1530 return; | |
| 1531 case InstArithmetic::Mul: { | |
| 1532 // GCC 4.8 does: | |
| 1533 // a=b*c ==> | |
| 1534 // t_acc =(mul) (b.lo * c.hi) | |
| 1535 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
| 1536 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1537 // t.hi += t_acc | |
| 1538 // a.lo = t.lo | |
| 1539 // a.hi = t.hi | |
| 1540 // | |
| 1541 // LLVM does: | |
| 1542 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1543 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
| 1544 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
| 1545 // a.lo = t.lo | |
| 1546 // a.hi = t.hi | |
| 1547 // | |
| 1548 // LLVM's lowering has fewer instructions, but more register pressure: | |
| 1549 // t.lo is live from beginning to end, while GCC delays the two-dest | |
| 1550 // instruction till the end, and kills c.hi immediately. | |
| 1551 Variable *T_Acc = makeReg(IceType_i32); | |
| 1552 Variable *T_Acc1 = makeReg(IceType_i32); | |
| 1553 Variable *T_Hi1 = makeReg(IceType_i32); | |
| 1554 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1555 Variable *Src1RHi = legalizeToReg(Src1Hi); | |
| 1556 _mul(T_Acc, Src0RLo, Src1RHi); | |
| 1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
| 1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
| 1559 _add(T_Hi, T_Hi1, T_Acc1); | |
| 1560 _mov(DestLo, T_Lo); | |
| 1561 _mov(DestHi, T_Hi); | |
| 1562 return; | |
| 1563 } | |
| 1564 case InstArithmetic::Shl: { | |
| 1565 // a=b<<c ==> | |
| 1566 // pnacl-llc does: | |
| 1567 // mov t_b.lo, b.lo | |
| 1568 // mov t_b.hi, b.hi | |
| 1569 // mov t_c.lo, c.lo | |
| 1570 // rsb T0, t_c.lo, #32 | |
| 1571 // lsr T1, t_b.lo, T0 | |
| 1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
| 1573 // sub T2, t_c.lo, #32 | |
| 1574 // cmp T2, #0 | |
| 1575 // lslge t_a.hi, t_b.lo, T2 | |
| 1576 // lsl t_a.lo, t_b.lo, t_c.lo | |
| 1577 // mov a.lo, t_a.lo | |
| 1578 // mov a.hi, t_a.hi | |
| 1579 // | |
| 1580 // GCC 4.8 does: | |
| 1581 // sub t_c1, c.lo, #32 | |
| 1582 // lsl t_hi, b.hi, c.lo | |
| 1583 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
| 1584 // rsb t_c2, c.lo, #32 | |
| 1585 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
| 1586 // lsl t_lo, b.lo, c.lo | |
| 1587 // a.lo = t_lo | |
| 1588 // a.hi = t_hi | |
| 1589 // | |
| 1590 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1591 // Can be strength-reduced for constant-shifts, but we don't do that for | |
| 1592 // now. | |
| 1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
| 1594 // ARM, shifts only take the lower 8 bits of the shift register, and | |
| 1595 // saturate to the range 0-32, so the negative value will saturate to 32. | |
| 1596 Constant *_32 = Ctx->getConstantInt32(32); | |
| 1597 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
| 1598 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1599 Variable *T0 = makeReg(IceType_i32); | |
| 1600 Variable *T1 = makeReg(IceType_i32); | |
| 1601 Variable *T2 = makeReg(IceType_i32); | |
| 1602 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1603 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1604 _rsb(T0, Src1RLo, _32); | |
| 1605 _lsr(T1, Src0RLo, T0); | |
| 1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1607 OperandARM32::LSL, Src1RLo)); | |
| 1608 _sub(T2, Src1RLo, _32); | |
| 1609 _cmp(T2, _0); | |
| 1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
| 1611 _set_dest_redefined(); | |
| 1612 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
| 1613 _mov(DestLo, TA_Lo); | |
| 1614 _mov(DestHi, TA_Hi); | |
| 1615 return; | |
| 1616 } | |
| 1617 case InstArithmetic::Lshr: | |
| 1618 case InstArithmetic::Ashr: { | |
| 1619 // a=b>>c | |
| 1620 // pnacl-llc does: | |
| 1621 // mov t_b.lo, b.lo | |
| 1622 // mov t_b.hi, b.hi | |
| 1623 // mov t_c.lo, c.lo | |
| 1624 // lsr T0, t_b.lo, t_c.lo | |
| 1625 // rsb T1, t_c.lo, #32 | |
| 1626 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
| 1627 // sub T2, t_c.lo, #32 | |
| 1628 // cmp T2, #0 | |
| 1629 // [al]srge t_a.lo, t_b.hi, T2 | |
| 1630 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
| 1631 // mov a.lo, t_a.lo | |
| 1632 // mov a.hi, t_a.hi | |
| 1633 // | |
| 1634 // GCC 4.8 does (lsr): | |
| 1635 // rsb t_c1, c.lo, #32 | |
| 1636 // lsr t_lo, b.lo, c.lo | |
| 1637 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
| 1638 // sub t_c2, c.lo, #32 | |
| 1639 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
| 1640 // lsr t_hi, b.hi, c.lo | |
| 1641 // mov a.lo, t_lo | |
| 1642 // mov a.hi, t_hi | |
| 1643 // | |
| 1644 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | |
| 1646 Constant *_32 = Ctx->getConstantInt32(32); | |
| 1647 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
| 1648 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1649 Variable *T0 = makeReg(IceType_i32); | |
| 1650 Variable *T1 = makeReg(IceType_i32); | |
| 1651 Variable *T2 = makeReg(IceType_i32); | |
| 1652 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1653 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1654 _lsr(T0, Src0RLo, Src1RLo); | |
| 1655 _rsb(T1, Src1RLo, _32); | |
| 1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1657 OperandARM32::LSL, T1)); | |
| 1658 _sub(T2, Src1RLo, _32); | |
| 1659 _cmp(T2, _0); | |
| 1660 if (IsAshr) { | |
| 1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1662 _set_dest_redefined(); | |
| 1663 _asr(TA_Hi, Src0RHi, Src1RLo); | |
| 1664 } else { | |
| 1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1666 _set_dest_redefined(); | |
| 1667 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
| 1668 } | |
| 1669 _mov(DestLo, TA_Lo); | |
| 1670 _mov(DestHi, TA_Hi); | |
| 1671 return; | |
| 1672 } | |
| 1673 case InstArithmetic::Fadd: | |
| 1674 case InstArithmetic::Fsub: | |
| 1675 case InstArithmetic::Fmul: | |
| 1676 case InstArithmetic::Fdiv: | |
| 1677 case InstArithmetic::Frem: | |
| 1678 llvm_unreachable("FP instruction with i64 type"); | |
| 1679 return; | |
| 1680 case InstArithmetic::Udiv: | |
| 1681 case InstArithmetic::Sdiv: | |
| 1682 case InstArithmetic::Urem: | |
| 1683 case InstArithmetic::Srem: | |
| 1684 llvm_unreachable("Call-helper-involved instruction for i64 type " | |
| 1685 "should have already been handled before"); | |
| 1686 return; | |
| 1687 } | |
| 1688 return; | |
| 1689 } else if (isVectorType(Dest->getType())) { | |
| 1690 // Add a fake def to keep liveness consistent in the meantime. | 1948 // Add a fake def to keep liveness consistent in the meantime. |
| 1691 Variable *T = makeReg(Dest->getType()); | 1949 Variable *T = makeReg(Dest->getType()); |
| 1692 Context.insert(InstFakeDef::create(Func, T)); | 1950 Context.insert(InstFakeDef::create(Func, T)); |
| 1693 _mov(Dest, T); | 1951 _mov(Dest, T); |
| 1694 UnimplementedError(Func->getContext()->getFlags()); | 1952 UnimplementedError(Func->getContext()->getFlags()); |
| 1695 return; | 1953 return; |
| 1696 } | 1954 } |
| 1955 | |
| 1697 // Dest->getType() is a non-i64 scalar. | 1956 // Dest->getType() is a non-i64 scalar. |
| 1698 Variable *Src0R = legalizeToReg(Src0); | |
| 1699 Variable *T = makeReg(Dest->getType()); | 1957 Variable *T = makeReg(Dest->getType()); |
| 1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect | 1958 |
| 1959 // * Handle div/rem separately. They require a non-legalized Src1 to inspect | |
| 1701 // whether or not Src1 is a non-zero constant. Once legalized it is more | 1960 // whether or not Src1 is a non-zero constant. Once legalized it is more |
| 1702 // difficult to determine (constant may be moved to a register). | 1961 // difficult to determine (constant may be moved to a register). |
| 1962 // * Handle floating point arithmetic separately: they require Src1 to be | |
| 1963 // legalized to a register. | |
| 1703 switch (Inst->getOp()) { | 1964 switch (Inst->getOp()) { |
| 1704 default: | 1965 default: |
| 1705 break; | 1966 break; |
| 1706 case InstArithmetic::Udiv: { | 1967 case InstArithmetic::Udiv: { |
| 1707 constexpr bool NotRemainder = false; | 1968 constexpr bool NotRemainder = false; |
| 1969 Variable *Src0R = legalizeToReg(Src0); | |
| 1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 1970 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
| 1709 H_udiv_i32, NotRemainder); | 1971 H_udiv_i32, NotRemainder); |
| 1710 return; | 1972 return; |
| 1711 } | 1973 } |
| 1712 case InstArithmetic::Sdiv: { | 1974 case InstArithmetic::Sdiv: { |
| 1713 constexpr bool NotRemainder = false; | 1975 constexpr bool NotRemainder = false; |
| 1976 Variable *Src0R = legalizeToReg(Src0); | |
| 1714 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, | 1977 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, |
| 1715 H_sdiv_i32, NotRemainder); | 1978 H_sdiv_i32, NotRemainder); |
| 1716 return; | 1979 return; |
| 1717 } | 1980 } |
| 1718 case InstArithmetic::Urem: { | 1981 case InstArithmetic::Urem: { |
| 1719 constexpr bool IsRemainder = true; | 1982 constexpr bool IsRemainder = true; |
| 1983 Variable *Src0R = legalizeToReg(Src0); | |
| 1720 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 1984 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
| 1721 H_urem_i32, IsRemainder); | 1985 H_urem_i32, IsRemainder); |
| 1722 return; | 1986 return; |
| 1723 } | 1987 } |
| 1724 case InstArithmetic::Srem: { | 1988 case InstArithmetic::Srem: { |
| 1725 constexpr bool IsRemainder = true; | 1989 constexpr bool IsRemainder = true; |
| 1990 Variable *Src0R = legalizeToReg(Src0); | |
| 1726 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, | 1991 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, |
| 1727 H_srem_i32, IsRemainder); | 1992 H_srem_i32, IsRemainder); |
| 1728 return; | 1993 return; |
| 1729 } | 1994 } |
| 1730 case InstArithmetic::Frem: { | 1995 case InstArithmetic::Frem: { |
| 1731 const SizeT MaxSrcs = 2; | 1996 constexpr SizeT MaxSrcs = 2; |
| 1997 Variable *Src0R = legalizeToReg(Src0); | |
| 1732 Type Ty = Dest->getType(); | 1998 Type Ty = Dest->getType(); |
| 1733 InstCall *Call = makeHelperCall( | 1999 InstCall *Call = makeHelperCall( |
| 1734 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); | 2000 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
| 1735 Call->addArg(Src0R); | 2001 Call->addArg(Src0R); |
| 1736 Call->addArg(Src1); | 2002 Call->addArg(Src1); |
| 1737 lowerCall(Call); | 2003 lowerCall(Call); |
| 1738 return; | 2004 return; |
| 1739 } | 2005 } |
| 1740 } | |
| 1741 | |
| 1742 // Handle floating point arithmetic separately: they require Src1 to be | |
| 1743 // legalized to a register. | |
| 1744 switch (Inst->getOp()) { | |
| 1745 default: | |
| 1746 break; | |
| 1747 case InstArithmetic::Fadd: { | 2006 case InstArithmetic::Fadd: { |
| 2007 Variable *Src0R = legalizeToReg(Src0); | |
| 1748 Variable *Src1R = legalizeToReg(Src1); | 2008 Variable *Src1R = legalizeToReg(Src1); |
| 1749 _vadd(T, Src0R, Src1R); | 2009 _vadd(T, Src0R, Src1R); |
| 1750 _mov(Dest, T); | 2010 _mov(Dest, T); |
| 1751 return; | 2011 return; |
| 1752 } | 2012 } |
| 1753 case InstArithmetic::Fsub: { | 2013 case InstArithmetic::Fsub: { |
| 2014 Variable *Src0R = legalizeToReg(Src0); | |
| 1754 Variable *Src1R = legalizeToReg(Src1); | 2015 Variable *Src1R = legalizeToReg(Src1); |
| 1755 _vsub(T, Src0R, Src1R); | 2016 _vsub(T, Src0R, Src1R); |
| 1756 _mov(Dest, T); | 2017 _mov(Dest, T); |
| 1757 return; | 2018 return; |
| 1758 } | 2019 } |
| 1759 case InstArithmetic::Fmul: { | 2020 case InstArithmetic::Fmul: { |
| 2021 Variable *Src0R = legalizeToReg(Src0); | |
| 1760 Variable *Src1R = legalizeToReg(Src1); | 2022 Variable *Src1R = legalizeToReg(Src1); |
| 1761 _vmul(T, Src0R, Src1R); | 2023 _vmul(T, Src0R, Src1R); |
| 1762 _mov(Dest, T); | 2024 _mov(Dest, T); |
| 1763 return; | 2025 return; |
| 1764 } | 2026 } |
| 1765 case InstArithmetic::Fdiv: { | 2027 case InstArithmetic::Fdiv: { |
| 2028 Variable *Src0R = legalizeToReg(Src0); | |
| 1766 Variable *Src1R = legalizeToReg(Src1); | 2029 Variable *Src1R = legalizeToReg(Src1); |
| 1767 _vdiv(T, Src0R, Src1R); | 2030 _vdiv(T, Src0R, Src1R); |
| 1768 _mov(Dest, T); | 2031 _mov(Dest, T); |
| 1769 return; | 2032 return; |
| 1770 } | 2033 } |
| 1771 } | 2034 } |
| 1772 | 2035 |
| 1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 2036 // Handle everything else here. |
| 2037 Int32Operands Srcs(Src0, Src1); | |
| 1774 switch (Inst->getOp()) { | 2038 switch (Inst->getOp()) { |
| 1775 case InstArithmetic::_num: | 2039 case InstArithmetic::_num: |
| 1776 llvm_unreachable("Unknown arithmetic operator"); | 2040 llvm::report_fatal_error("Unknown arithmetic operator"); |
| 1777 return; | 2041 return; |
| 1778 case InstArithmetic::Add: | 2042 case InstArithmetic::Add: |
| 1779 _add(T, Src0R, Src1RF); | 2043 if (Srcs.hasConstOperand()) { |
| 2044 if (!Srcs.immediateIsFlexEncodable() && | |
| 2045 Srcs.negatedImmediateIsFlexEncodable()) { | |
| 2046 Variable *Src0R = Srcs.src0R(this); | |
| 2047 Operand *Src1F = Srcs.negatedSrc1F(this); | |
| 2048 if (!Srcs.swappedOperands()) { | |
| 2049 _sub(T, Src0R, Src1F); | |
| 2050 } else { | |
| 2051 _rsb(T, Src0R, Src1F); | |
| 2052 } | |
| 2053 _mov(Dest, T); | |
| 2054 return; | |
| 2055 } | |
| 2056 } | |
| 2057 _add(T, Srcs.src0R(this), Srcs.src1RF(this)); | |
| 1780 _mov(Dest, T); | 2058 _mov(Dest, T); |
| 1781 return; | 2059 return; |
| 1782 case InstArithmetic::And: | 2060 case InstArithmetic::And: |
| 1783 _and(T, Src0R, Src1RF); | 2061 if (Srcs.hasConstOperand()) { |
| 2062 if (!Srcs.immediateIsFlexEncodable() && | |
| 2063 Srcs.invertedImmediateIsFlexEncodable()) { | |
| 2064 Variable *Src0R = Srcs.src0R(this); | |
| 2065 Operand *Src1F = Srcs.invertedSrc1F(this); | |
| 2066 _bic(T, Src0R, Src1F); | |
| 2067 _mov(Dest, T); | |
| 2068 return; | |
| 2069 } | |
| 2070 } | |
| 2071 _and(T, Srcs.src0R(this), Srcs.src1RF(this)); | |
| 1784 _mov(Dest, T); | 2072 _mov(Dest, T); |
| 1785 return; | 2073 return; |
| 1786 case InstArithmetic::Or: | 2074 case InstArithmetic::Or: |
| 1787 _orr(T, Src0R, Src1RF); | 2075 _orr(T, Srcs.src0R(this), Srcs.src1RF(this)); |
| 1788 _mov(Dest, T); | 2076 _mov(Dest, T); |
| 1789 return; | 2077 return; |
| 1790 case InstArithmetic::Xor: | 2078 case InstArithmetic::Xor: |
| 1791 _eor(T, Src0R, Src1RF); | 2079 _eor(T, Srcs.src0R(this), Srcs.src1RF(this)); |
| 1792 _mov(Dest, T); | 2080 _mov(Dest, T); |
| 1793 return; | 2081 return; |
| 1794 case InstArithmetic::Sub: | 2082 case InstArithmetic::Sub: |
| 1795 _sub(T, Src0R, Src1RF); | 2083 if (Srcs.hasConstOperand()) { |
| 2084 if (Srcs.immediateIsFlexEncodable()) { | |
| 2085 if (Srcs.swappedOperands()) { | |
| 2086 _rsb(T, Srcs.src0R(this), Srcs.src1RF(this)); | |
| 2087 } else { | |
| 2088 _sub(T, Srcs.src0R(this), Srcs.src1RF(this)); | |
| 2089 } | |
| 2090 _mov(Dest, T); | |
| 2091 return; | |
| 2092 } | |
| 2093 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { | |
| 2094 _add(T, Srcs.src0R(this), Srcs.negatedSrc1F(this)); | |
| 2095 _mov(Dest, T); | |
| 2096 return; | |
| 2097 } | |
| 2098 } | |
| 2099 _sub(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1R(this)); | |
| 1796 _mov(Dest, T); | 2100 _mov(Dest, T); |
| 1797 return; | 2101 return; |
| 1798 case InstArithmetic::Mul: { | 2102 case InstArithmetic::Mul: { |
| 1799 Variable *Src1R = legalizeToReg(Src1RF); | 2103 _mul(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1R(this)); |
| 1800 _mul(T, Src0R, Src1R); | |
| 1801 _mov(Dest, T); | 2104 _mov(Dest, T); |
| 1802 return; | 2105 return; |
| 1803 } | 2106 } |
| 1804 case InstArithmetic::Shl: | 2107 case InstArithmetic::Shl: { |
| 1805 _lsl(T, Src0R, Src1RF); | 2108 _lsl(T, Srcs.unswappedSrc0R(this), Srcs.unswappedSrc1RF(this)); |
| 1806 _mov(Dest, T); | 2109 _mov(Dest, T); |
| 1807 return; | 2110 return; |
| 1808 case InstArithmetic::Lshr: | 2111 } |
| 2112 case InstArithmetic::Lshr: { | |
| 2113 Variable *Src0R = Srcs.unswappedSrc0R(this); | |
| 1809 if (Dest->getType() != IceType_i32) { | 2114 if (Dest->getType() != IceType_i32) { |
| 1810 _uxt(Src0R, Src0R); | 2115 _uxt(Src0R, Src0R); |
| 1811 } | 2116 } |
| 1812 _lsr(T, Src0R, Src1RF); | 2117 _lsr(T, Src0R, Srcs.unswappedSrc1RF(this)); |
| 1813 _mov(Dest, T); | 2118 _mov(Dest, T); |
| 1814 return; | 2119 return; |
| 1815 case InstArithmetic::Ashr: | 2120 } |
| 2121 case InstArithmetic::Ashr: { | |
| 2122 Variable *Src0R = Srcs.unswappedSrc0R(this); | |
| 1816 if (Dest->getType() != IceType_i32) { | 2123 if (Dest->getType() != IceType_i32) { |
| 1817 _sxt(Src0R, Src0R); | 2124 _sxt(Src0R, Src0R); |
| 1818 } | 2125 } |
| 1819 _asr(T, Src0R, Src1RF); | 2126 _asr(T, Src0R, Srcs.unswappedSrc1RF(this)); |
| 1820 _mov(Dest, T); | 2127 _mov(Dest, T); |
| 1821 return; | 2128 return; |
| 2129 } | |
| 1822 case InstArithmetic::Udiv: | 2130 case InstArithmetic::Udiv: |
| 1823 case InstArithmetic::Sdiv: | 2131 case InstArithmetic::Sdiv: |
| 1824 case InstArithmetic::Urem: | 2132 case InstArithmetic::Urem: |
| 1825 case InstArithmetic::Srem: | 2133 case InstArithmetic::Srem: |
| 1826 llvm_unreachable("Integer div/rem should have been handled earlier."); | 2134 llvm::report_fatal_error( |
| 2135 "Integer div/rem should have been handled earlier."); | |
| 1827 return; | 2136 return; |
| 1828 case InstArithmetic::Fadd: | 2137 case InstArithmetic::Fadd: |
| 1829 case InstArithmetic::Fsub: | 2138 case InstArithmetic::Fsub: |
| 1830 case InstArithmetic::Fmul: | 2139 case InstArithmetic::Fmul: |
| 1831 case InstArithmetic::Fdiv: | 2140 case InstArithmetic::Fdiv: |
| 1832 case InstArithmetic::Frem: | 2141 case InstArithmetic::Frem: |
| 1833 llvm_unreachable("Floating point arith should have been handled earlier."); | 2142 llvm::report_fatal_error( |
| 2143 "Floating point arith should have been handled earlier."); | |
| 1834 return; | 2144 return; |
| 1835 } | 2145 } |
| 1836 } | 2146 } |
| 1837 | 2147 |
| 1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { | 2148 void TargetARM32::lowerAssign(const InstAssign *Inst) { |
| 1839 Variable *Dest = Inst->getDest(); | 2149 Variable *Dest = Inst->getDest(); |
| 1840 Operand *Src0 = Inst->getSrc(0); | 2150 Operand *Src0 = Inst->getSrc(0); |
| 1841 assert(Dest->getType() == Src0->getType()); | 2151 assert(Dest->getType() == Src0->getType()); |
| 1842 if (Dest->getType() == IceType_i64) { | 2152 if (Dest->getType() == IceType_i64) { |
| 1843 Src0 = legalizeUndef(Src0); | 2153 Src0 = legalizeUndef(Src0); |
| 2154 | |
| 2155 Variable *T_Lo = makeReg(IceType_i32); | |
| 2156 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 2157 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| 1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
| 1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1848 Variable *T_Lo = makeReg(IceType_i32); | |
| 1849 Variable *T_Hi = makeReg(IceType_i32); | |
| 1850 | |
| 1851 _mov(T_Lo, Src0Lo); | 2158 _mov(T_Lo, Src0Lo); |
| 1852 _mov(DestLo, T_Lo); | 2159 _mov(DestLo, T_Lo); |
| 2160 | |
| 2161 Variable *T_Hi = makeReg(IceType_i32); | |
| 2162 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 2163 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
| 1853 _mov(T_Hi, Src0Hi); | 2164 _mov(T_Hi, Src0Hi); |
| 1854 _mov(DestHi, T_Hi); | 2165 _mov(DestHi, T_Hi); |
| 2166 | |
| 2167 return; | |
| 2168 } | |
| 2169 | |
| 2170 Operand *NewSrc; | |
| 2171 if (Dest->hasReg()) { | |
| 2172 // If Dest already has a physical register, then legalize the Src operand | |
| 2173 // into a Variable with the same register assignment. This especially | |
| 2174 // helps allow the use of Flex operands. | |
| 2175 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
| 1855 } else { | 2176 } else { |
| 1856 Operand *NewSrc; | 2177 // Dest could be a stack operand. Since we could potentially need to do a |
| 1857 if (Dest->hasReg()) { | 2178 // Store (and store can only have Register operands), legalize this to a |
| 1858 // If Dest already has a physical register, then legalize the Src operand | 2179 // register. |
| 1859 // into a Variable with the same register assignment. This especially | 2180 NewSrc = legalize(Src0, Legal_Reg); |
| 1860 // helps allow the use of Flex operands. | |
| 1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
| 1862 } else { | |
| 1863 // Dest could be a stack operand. Since we could potentially need to do a | |
| 1864 // Store (and store can only have Register operands), legalize this to a | |
| 1865 // register. | |
| 1866 NewSrc = legalize(Src0, Legal_Reg); | |
| 1867 } | |
| 1868 if (isVectorType(Dest->getType())) { | |
| 1869 Variable *SrcR = legalizeToReg(NewSrc); | |
| 1870 _mov(Dest, SrcR); | |
| 1871 } else if (isFloatingType(Dest->getType())) { | |
| 1872 Variable *SrcR = legalizeToReg(NewSrc); | |
| 1873 _mov(Dest, SrcR); | |
| 1874 } else { | |
| 1875 _mov(Dest, NewSrc); | |
| 1876 } | |
| 1877 } | 2181 } |
| 2182 | |
| 2183 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { | |
| 2184 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); | |
| 2185 } | |
| 2186 _mov(Dest, NewSrc); | |
| 1878 } | 2187 } |
| 1879 | 2188 |
| 1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 2189 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
| 1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 2190 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
| 1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 2191 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { |
| 1883 InstARM32Label *NewShortCircuitLabel = nullptr; | 2192 InstARM32Label *NewShortCircuitLabel = nullptr; |
| 1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 2193 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
| 1885 | 2194 |
| 1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); | 2195 const Inst *Producer = BoolComputations.getProducerOf(Boolean); |
| 1887 | 2196 |
| (...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2573 struct { | 2882 struct { |
| 2574 CondARM32::Cond CC0; | 2883 CondARM32::Cond CC0; |
| 2575 CondARM32::Cond CC1; | 2884 CondARM32::Cond CC1; |
| 2576 } TableFcmp[] = { | 2885 } TableFcmp[] = { |
| 2577 #define X(val, CC0, CC1) \ | 2886 #define X(val, CC0, CC1) \ |
| 2578 { CondARM32::CC0, CondARM32::CC1 } \ | 2887 { CondARM32::CC0, CondARM32::CC1 } \ |
| 2579 , | 2888 , |
| 2580 FCMPARM32_TABLE | 2889 FCMPARM32_TABLE |
| 2581 #undef X | 2890 #undef X |
| 2582 }; | 2891 }; |
| 2892 | |
| 2893 bool isFloatingPointZero(Operand *Src) { | |
| 2894 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { | |
| 2895 return Utils::isPositiveZero(F32->getValue()); | |
| 2896 } | |
| 2897 | |
| 2898 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { | |
| 2899 return Utils::isPositiveZero(F64->getValue()); | |
| 2900 } | |
| 2901 | |
| 2902 return false; | |
| 2903 } | |
| 2583 } // end of anonymous namespace | 2904 } // end of anonymous namespace |
| 2584 | 2905 |
| 2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 2906 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
| 2586 InstFcmp::FCond Condition = Instr->getCondition(); | 2907 InstFcmp::FCond Condition = Instr->getCondition(); |
| 2587 switch (Condition) { | 2908 switch (Condition) { |
| 2588 case InstFcmp::False: | 2909 case InstFcmp::False: |
| 2589 return CondWhenTrue(CondARM32::kNone); | 2910 return CondWhenTrue(CondARM32::kNone); |
| 2590 case InstFcmp::True: | 2911 case InstFcmp::True: |
| 2591 return CondWhenTrue(CondARM32::AL); | 2912 return CondWhenTrue(CondARM32::AL); |
| 2592 break; | 2913 break; |
| 2593 default: { | 2914 default: { |
| 2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); | 2915 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
| 2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); | 2916 Operand *Src1 = Instr->getSrc(1); |
| 2596 _vcmp(Src0R, Src1R); | 2917 if (isFloatingPointZero(Src1)) { |
| 2918 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); | |
| 2919 } else { | |
| 2920 _vcmp(Src0R, legalizeToReg(Src1)); | |
| 2921 } | |
| 2597 _vmrs(); | 2922 _vmrs(); |
| 2598 assert(Condition < llvm::array_lengthof(TableFcmp)); | 2923 assert(Condition < llvm::array_lengthof(TableFcmp)); |
| 2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); | 2924 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); |
| 2600 } | 2925 } |
| 2601 } | 2926 } |
| 2602 } | 2927 } |
| 2603 | 2928 |
| 2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { | 2929 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
| 2605 Variable *Dest = Instr->getDest(); | 2930 Variable *Dest = Instr->getDest(); |
| 2606 if (isVectorType(Dest->getType())) { | 2931 if (isVectorType(Dest->getType())) { |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 2635 _mov(T, _1, Cond.WhenTrue0); | 2960 _mov(T, _1, Cond.WhenTrue0); |
| 2636 } | 2961 } |
| 2637 | 2962 |
| 2638 if (Cond.WhenTrue1 != CondARM32::kNone) { | 2963 if (Cond.WhenTrue1 != CondARM32::kNone) { |
| 2639 _mov_redefined(T, _1, Cond.WhenTrue1); | 2964 _mov_redefined(T, _1, Cond.WhenTrue1); |
| 2640 } | 2965 } |
| 2641 | 2966 |
| 2642 _mov(Dest, T); | 2967 _mov(Dest, T); |
| 2643 } | 2968 } |
| 2644 | 2969 |
| 2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { | 2970 TargetARM32::CondWhenTrue |
| 2646 assert(Inst->getSrc(0)->getType() != IceType_i1); | 2971 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| 2647 assert(Inst->getSrc(1)->getType() != IceType_i1); | 2972 Operand *Src1) { |
| 2973 size_t Index = static_cast<size_t>(Condition); | |
| 2974 assert(Index < llvm::array_lengthof(TableIcmp64)); | |
| 2648 | 2975 |
| 2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2976 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); |
| 2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2977 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); |
| 2978 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); | |
| 2979 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); | |
| 2980 | |
| 2981 if (SrcsLo.hasConstOperand()) { | |
| 2982 const uint32_t ValueLo = SrcsLo.getConstantValue(); | |
| 2983 const uint32_t ValueHi = SrcsHi.getConstantValue(); | |
| 2984 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo; | |
| 2985 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
| 2986 Value == 0) { | |
| 2987 Variable *T = makeReg(IceType_i32); | |
| 2988 _orrs(T, SrcsLo.src0R(this), SrcsHi.src0R(this)); | |
| 2989 Context.insert(InstFakeUse::create(Func, T)); | |
| 2990 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 2991 } | |
| 2992 | |
| 2993 Variable *Src0RLo = SrcsLo.src0R(this); | |
| 2994 Variable *Src0RHi = SrcsHi.src0R(this); | |
| 2995 Operand *Src1RFLo = SrcsLo.src1RF(this); | |
| 2996 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this); | |
| 2997 | |
| 2998 const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands(); | |
| 2999 | |
| 3000 if (UseRsb) { | |
| 3001 if (TableIcmp64[Index].IsSigned) { | |
| 3002 Variable *T = makeReg(IceType_i32); | |
| 3003 _rsbs(T, Src0RLo, Src1RFLo); | |
| 3004 Context.insert(InstFakeUse::create(Func, T)); | |
| 3005 | |
| 3006 T = makeReg(IceType_i32); | |
| 3007 _rscs(T, Src0RHi, Src1RFHi); | |
| 3008 // We need to add a FakeUse here because liveness gets mad at us (Def | |
| 3009 // without Use.) Note that flag-setting instructions are considered to | |
| 3010 // have side effects and, therefore, are not DCE'ed. | |
| 3011 Context.insert(InstFakeUse::create(Func, T)); | |
| 3012 } else { | |
| 3013 Variable *T = makeReg(IceType_i32); | |
| 3014 _rsbs(T, Src0RHi, Src1RFHi); | |
| 3015 Context.insert(InstFakeUse::create(Func, T)); | |
| 3016 | |
| 3017 T = makeReg(IceType_i32); | |
| 3018 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); | |
| 3019 Context.insert(InstFakeUse::create(Func, T)); | |
| 3020 } | |
| 3021 } else { | |
| 3022 if (TableIcmp64[Index].IsSigned) { | |
| 3023 _cmp(Src0RLo, Src1RFLo); | |
| 3024 Variable *T = makeReg(IceType_i32); | |
| 3025 _sbcs(T, Src0RHi, Src1RFHi); | |
| 3026 Context.insert(InstFakeUse::create(Func, T)); | |
| 3027 } else { | |
| 3028 _cmp(Src0RHi, Src1RFHi); | |
| 3029 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | |
| 3030 } | |
| 3031 } | |
| 3032 | |
| 3033 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 3034 } | |
| 3035 | |
| 3036 Variable *Src0RLo, *Src0RHi; | |
| 3037 Operand *Src1RFLo, *Src1RFHi; | |
| 3038 if (TableIcmp64[Index].Swapped) { | |
| 3039 Src0RLo = legalizeToReg(loOperand(Src1)); | |
| 3040 Src0RHi = legalizeToReg(hiOperand(Src1)); | |
| 3041 Src1RFLo = legalizeToReg(loOperand(Src0)); | |
| 3042 Src1RFHi = legalizeToReg(hiOperand(Src0)); | |
| 3043 } else { | |
| 3044 Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 3045 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 3046 Src1RFLo = legalizeToReg(loOperand(Src1)); | |
| 3047 Src1RFHi = legalizeToReg(hiOperand(Src1)); | |
| 3048 } | |
| 2651 | 3049 |
| 2652 // a=icmp cond, b, c ==> | 3050 // a=icmp cond, b, c ==> |
| 2653 // GCC does: | 3051 // GCC does: |
| 2654 // cmp b.hi, c.hi or cmp b.lo, c.lo | 3052 // cmp b.hi, c.hi or cmp b.lo, c.lo |
| 2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 3053 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
| 2656 // mov.<C1> t, #1 mov.<C1> t, #1 | 3054 // mov.<C1> t, #1 mov.<C1> t, #1 |
| 2657 // mov.<C2> t, #0 mov.<C2> t, #0 | 3055 // mov.<C2> t, #0 mov.<C2> t, #0 |
| 2658 // mov a, t mov a, t | 3056 // mov a, t mov a, t |
| 2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 3057 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
| 2660 // is used for signed compares. In some cases, b and c need to be swapped as | 3058 // is used for signed compares. In some cases, b and c need to be swapped as |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 2671 // that's nice in that it's just as short but has fewer dependencies for | 3069 // that's nice in that it's just as short but has fewer dependencies for |
| 2672 // better ILP at the cost of more registers. | 3070 // better ILP at the cost of more registers. |
| 2673 // | 3071 // |
| 2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 3072 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
| 2675 // unconditional mov #0, two cmps, two conditional mov #1, and one | 3073 // unconditional mov #0, two cmps, two conditional mov #1, and one |
| 2676 // conditional reg mov. That has few dependencies for good ILP, but is a | 3074 // conditional reg mov. That has few dependencies for good ILP, but is a |
| 2677 // longer sequence. | 3075 // longer sequence. |
| 2678 // | 3076 // |
| 2679 // So, we are going with the GCC version since it's usually better (except | 3077 // So, we are going with the GCC version since it's usually better (except |
| 2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 3078 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
| 3079 if (TableIcmp64[Index].IsSigned) { | |
| 3080 Variable *ScratchReg = makeReg(IceType_i32); | |
| 3081 _cmp(Src0RLo, Src1RFLo); | |
| 3082 _sbcs(ScratchReg, Src0RHi, Src1RFHi); | |
| 3083 // ScratchReg isn't going to be used, but we need the side-effect of | |
| 3084 // setting flags from this operation. | |
| 3085 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
| 3086 } else { | |
| 3087 _cmp(Src0RHi, Src1RFHi); | |
| 3088 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | |
| 3089 } | |
| 3090 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 3091 } | |
| 2681 | 3092 |
| 2682 if (Src0->getType() == IceType_i64) { | 3093 TargetARM32::CondWhenTrue |
| 2683 InstIcmp::ICond Conditon = Inst->getCondition(); | 3094 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| 2684 size_t Index = static_cast<size_t>(Conditon); | 3095 Operand *Src1) { |
| 2685 assert(Index < llvm::array_lengthof(TableIcmp64)); | 3096 Int32Operands Srcs(Src0, Src1); |
| 2686 Variable *Src0Lo, *Src0Hi; | 3097 if (!Srcs.hasConstOperand()) { |
| 2687 Operand *Src1LoRF, *Src1HiRF; | 3098 |
| 2688 if (TableIcmp64[Index].Swapped) { | 3099 Variable *Src0R = Srcs.src0R(this); |
| 2689 Src0Lo = legalizeToReg(loOperand(Src1)); | 3100 Operand *Src1RF = Srcs.src1RF(this); |
| 2690 Src0Hi = legalizeToReg(hiOperand(Src1)); | 3101 _cmp(Src0R, Src1RF); |
| 2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 3102 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
| 2693 } else { | |
| 2694 Src0Lo = legalizeToReg(loOperand(Src0)); | |
| 2695 Src0Hi = legalizeToReg(hiOperand(Src0)); | |
| 2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
| 2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
| 2698 } | |
| 2699 if (TableIcmp64[Index].IsSigned) { | |
| 2700 Variable *ScratchReg = makeReg(IceType_i32); | |
| 2701 _cmp(Src0Lo, Src1LoRF); | |
| 2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | |
| 2703 // ScratchReg isn't going to be used, but we need the side-effect of | |
| 2704 // setting flags from this operation. | |
| 2705 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
| 2706 } else { | |
| 2707 _cmp(Src0Hi, Src1HiRF); | |
| 2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | |
| 2709 } | |
| 2710 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 2711 } | 3103 } |
| 2712 | 3104 |
| 3105 Variable *Src0R = Srcs.src0R(this); | |
| 3106 const int32_t Value = Srcs.getConstantValue(); | |
| 3107 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { | |
| 3108 _tst(Src0R, Src0R); | |
| 3109 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3110 } | |
| 3111 | |
| 3112 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() && | |
| 3113 Srcs.negatedImmediateIsFlexEncodable()) { | |
| 3114 Operand *Src1F = Srcs.negatedSrc1F(this); | |
| 3115 _cmn(Src0R, Src1F); | |
| 3116 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3117 } | |
| 3118 | |
| 3119 Operand *Src1RF = Srcs.src1RF(this); | |
| 3120 if (!Srcs.swappedOperands()) { | |
| 3121 _cmp(Src0R, Src1RF); | |
| 3122 } else { | |
| 3123 Variable *T = makeReg(IceType_i32); | |
| 3124 _rsbs(T, Src0R, Src1RF); | |
| 3125 Context.insert(InstFakeUse::create(Func, T)); | |
| 3126 } | |
| 3127 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3128 } | |
| 3129 | |
| 3130 TargetARM32::CondWhenTrue | |
| 3131 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, | |
| 3132 Operand *Src1) { | |
| 3133 Int32Operands Srcs(Src0, Src1); | |
| 3134 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); | |
| 3135 assert(ShAmt >= 0); | |
| 3136 | |
| 3137 if (!Srcs.hasConstOperand()) { | |
| 3138 Variable *Src0R = makeReg(IceType_i32); | |
| 3139 Operand *ShAmtF = | |
| 3140 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex); | |
| 3141 _lsl(Src0R, legalizeToReg(Src0), ShAmtF); | |
| 3142 | |
| 3143 Variable *Src1R = legalizeToReg(Src1); | |
| 3144 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create( | |
| 3145 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF); | |
| 3146 _cmp(Src0R, Src1F); | |
| 3147 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3148 } | |
| 3149 | |
| 3150 const int32_t Value = Srcs.getConstantValue(); | |
| 3151 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { | |
| 3152 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt); | |
| 3153 Variable *T = makeReg(IceType_i32); | |
| 3154 _lsls(T, Srcs.src0R(this), ShAmtOp); | |
| 3155 Context.insert(InstFakeUse::create(Func, T)); | |
| 3156 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3157 } | |
| 3158 | |
| 3159 Variable *ConstR = makeReg(IceType_i32); | |
| 3160 _mov(ConstR, | |
| 3161 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex)); | |
| 3162 Operand *NonConstF = OperandARM32FlexReg::create( | |
| 3163 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL, | |
| 3164 Ctx->getConstantInt32(ShAmt)); | |
| 3165 | |
| 3166 if (Srcs.swappedOperands()) { | |
| 3167 _cmp(ConstR, NonConstF); | |
| 3168 } else { | |
| 3169 Variable *T = makeReg(IceType_i32); | |
| 3170 _rsbs(T, ConstR, NonConstF); | |
| 3171 Context.insert(InstFakeUse::create(Func, T)); | |
| 3172 } | |
| 3173 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3174 } | |
| 3175 | |
| 3176 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { | |
| 3177 assert(Inst->getSrc(0)->getType() != IceType_i1); | |
| 3178 assert(Inst->getSrc(1)->getType() != IceType_i1); | |
| 3179 | |
| 3180 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | |
| 3181 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | |
| 3182 | |
| 3183 const InstIcmp::ICond Condition = Inst->getCondition(); | |
| 2713 // a=icmp cond b, c ==> | 3184 // a=icmp cond b, c ==> |
| 2714 // GCC does: | 3185 // GCC does: |
| 2715 // <u/s>xtb tb, b | 3186 // <u/s>xtb tb, b |
| 2716 // <u/s>xtb tc, c | 3187 // <u/s>xtb tc, c |
| 2717 // cmp tb, tc | 3188 // cmp tb, tc |
| 2718 // mov.C1 t, #0 | 3189 // mov.C1 t, #0 |
| 2719 // mov.C2 t, #1 | 3190 // mov.C2 t, #1 |
| 2720 // mov a, t | 3191 // mov a, t |
| 2721 // where the unsigned/sign extension is not needed for 32-bit. They also have | 3192 // where the unsigned/sign extension is not needed for 32-bit. They also have |
| 2722 // special cases for EQ and NE. E.g., for NE: | 3193 // special cases for EQ and NE. E.g., for NE: |
| 2723 // <extend to tb, tc> | 3194 // <extend to tb, tc> |
| 2724 // subs t, tb, tc | 3195 // subs t, tb, tc |
| 2725 // movne t, #1 | 3196 // movne t, #1 |
| 2726 // mov a, t | 3197 // mov a, t |
| 2727 // | 3198 // |
| 2728 // LLVM does: | 3199 // LLVM does: |
| 2729 // lsl tb, b, #<N> | 3200 // lsl tb, b, #<N> |
| 2730 // mov t, #0 | 3201 // mov t, #0 |
| 2731 // cmp tb, c, lsl #<N> | 3202 // cmp tb, c, lsl #<N> |
| 2732 // mov.<C> t, #1 | 3203 // mov.<C> t, #1 |
| 2733 // mov a, t | 3204 // mov a, t |
| 2734 // | 3205 // |
| 2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on | 3206 // the left shift is by 0, 16, or 24, which allows the comparison to focus on |
| 2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 3207 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
| 2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb | 3208 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
| 2738 // first. It's not clear to me why that special-casing is needed. | 3209 // first. It's not clear to me why that special-casing is needed. |
| 2739 // | 3210 // |
| 2740 // We'll go with the LLVM way for now, since it's shorter and has just as few | 3211 // We'll go with the LLVM way for now, since it's shorter and has just as few |
| 2741 // dependencies. | 3212 // dependencies. |
| 2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 3213 switch (Src0->getType()) { |
| 2743 assert(ShiftAmt >= 0); | 3214 default: |
| 2744 Constant *ShiftConst = nullptr; | 3215 llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); |
| 2745 Variable *Src0R = nullptr; | 3216 case IceType_i8: |
| 2746 if (ShiftAmt) { | 3217 case IceType_i16: |
| 2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 3218 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1); |
| 2748 Src0R = makeReg(IceType_i32); | 3219 case IceType_i32: |
| 2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 3220 return lowerInt32IcmpCond(Condition, Src0, Src1); |
| 2750 } else { | 3221 case IceType_i64: |
| 2751 Src0R = legalizeToReg(Src0); | 3222 return lowerInt64IcmpCond(Condition, Src0, Src1); |
| 2752 } | 3223 } |
| 2753 if (ShiftAmt) { | |
| 2754 Variable *Src1R = legalizeToReg(Src1); | |
| 2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( | |
| 2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); | |
| 2757 _cmp(Src0R, Src1RShifted); | |
| 2758 } else { | |
| 2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | |
| 2760 _cmp(Src0R, Src1RF); | |
| 2761 } | |
| 2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); | |
| 2763 } | 3224 } |
| 2764 | 3225 |
| 2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 3226 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
| 2766 Variable *Dest = Inst->getDest(); | 3227 Variable *Dest = Inst->getDest(); |
| 2767 | 3228 |
| 2768 if (isVectorType(Dest->getType())) { | 3229 if (isVectorType(Dest->getType())) { |
| 2769 Variable *T = makeReg(Dest->getType()); | 3230 Variable *T = makeReg(Dest->getType()); |
| 2770 Context.insert(InstFakeDef::create(Func, T)); | 3231 Context.insert(InstFakeDef::create(Func, T)); |
| 2771 _mov(Dest, T); | 3232 _mov(Dest, T); |
| 2772 UnimplementedError(Func->getContext()->getFlags()); | 3233 UnimplementedError(Func->getContext()->getFlags()); |
| (...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4247 } | 4708 } |
| 4248 return Reg; | 4709 return Reg; |
| 4249 } | 4710 } |
| 4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 4711 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
| 4251 Variable *Reg = makeReg(Ty, RegNum); | 4712 Variable *Reg = makeReg(Ty, RegNum); |
| 4252 _movw(Reg, C); | 4713 _movw(Reg, C); |
| 4253 _movt(Reg, C); | 4714 _movt(Reg, C); |
| 4254 return Reg; | 4715 return Reg; |
| 4255 } else { | 4716 } else { |
| 4256 assert(isScalarFloatingType(Ty)); | 4717 assert(isScalarFloatingType(Ty)); |
| 4718 uint32_t ModifiedImm; | |
| 4719 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | |
| 4720 Variable *T = makeReg(Ty, RegNum); | |
| 4721 _mov(T, | |
| 4722 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | |
| 4723 return T; | |
| 4724 } | |
| 4725 | |
| 4726 if (Ty == IceType_f64 && isFloatingPointZero(From)) { | |
| 4727 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 | |
| 4728 // because ARM does not have a veor instruction with S registers. | |
| 4729 Variable *T = makeReg(IceType_f64, RegNum); | |
| 4730 Context.insert(InstFakeDef::create(Func, T)); | |
| 4731 _veor(T, T, T); | |
| 4732 return T; | |
| 4733 } | |
| 4734 | |
| 4257 // Load floats/doubles from literal pool. | 4735 // Load floats/doubles from literal pool. |
| 4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an | |
| 4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified | |
| 4260 // immediate constants". Or, for 32-bit floating point numbers, just | |
| 4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG | |
| 4262 // instead of using a movw/movt pair to get the const-pool address then | |
| 4263 // loading to SREG. | |
| 4264 std::string Buffer; | 4736 std::string Buffer; |
| 4265 llvm::raw_string_ostream StrBuf(Buffer); | 4737 llvm::raw_string_ostream StrBuf(Buffer); |
| 4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 4738 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
| 4267 llvm::cast<Constant>(From)->setShouldBePooled(true); | 4739 llvm::cast<Constant>(From)->setShouldBePooled(true); |
| 4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 4740 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
| 4269 Variable *BaseReg = makeReg(getPointerType()); | 4741 Variable *BaseReg = makeReg(getPointerType()); |
| 4270 _movw(BaseReg, Offset); | 4742 _movw(BaseReg, Offset); |
| 4271 _movt(BaseReg, Offset); | 4743 _movt(BaseReg, Offset); |
| 4272 From = formMemoryOperand(BaseReg, Ty); | 4744 From = formMemoryOperand(BaseReg, Ty); |
| 4273 return copyToReg(From, RegNum); | 4745 return copyToReg(From, RegNum); |
| (...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 5371 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 4900 // However, for compatibility with current NaCl LLVM, don't claim that. | 5372 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 5373 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 4902 } | 5374 } |
| 4903 | 5375 |
| 4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 5376 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
| 4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 5377 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
| 4906 llvm::SmallBitVector TargetARM32::ScratchRegs; | 5378 llvm::SmallBitVector TargetARM32::ScratchRegs; |
| 4907 | 5379 |
| 4908 } // end of namespace Ice | 5380 } // end of namespace Ice |
| OLD | NEW |