Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(84)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixes the lit tests. Double is too precise. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after
1290 } 1290 }
1291 _mov(Dest, SP); 1291 _mov(Dest, SP);
1292 } 1292 }
1293 1293
1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1296 return; 1296 return;
1297 Variable *SrcLoReg = legalizeToReg(SrcLo); 1297 Variable *SrcLoReg = legalizeToReg(SrcLo);
1298 switch (Ty) { 1298 switch (Ty) {
1299 default: 1299 default:
1300 llvm_unreachable("Unexpected type"); 1300 llvm::report_fatal_error("Unexpected type");
1301 case IceType_i8: { 1301 case IceType_i8:
1302 Operand *Mask =
1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
1304 _tst(SrcLoReg, Mask);
1305 break;
1306 }
1307 case IceType_i16: { 1302 case IceType_i16: {
1308 Operand *Mask = 1303 Operand *ShAmtF =
1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),
1310 _tst(SrcLoReg, Mask); 1305 Legal_Reg | Legal_Flex);
1311 break; 1306 Variable *T = makeReg(IceType_i32);
1312 } 1307 _lsls(T, SrcLoReg, ShAmtF);
1308 Context.insert(InstFakeUse::create(Func, T));
1309 } break;
1313 case IceType_i32: { 1310 case IceType_i32: {
1314 _tst(SrcLoReg, SrcLoReg); 1311 _tst(SrcLoReg, SrcLoReg);
1315 break; 1312 break;
1316 } 1313 }
1317 case IceType_i64: { 1314 case IceType_i64: {
1318 Variable *ScratchReg = makeReg(IceType_i32); 1315 Variable *T = makeReg(IceType_i32);
1319 _orrs(ScratchReg, SrcLoReg, SrcHi); 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
1320 // ScratchReg isn't going to be used, but we need the side-effect of 1317 // T isn't going to be used, but we need the side-effect of setting flags
1321 // setting flags from this operation. 1318 // from this operation.
1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); 1319 Context.insert(InstFakeUse::create(Func, T));
1323 } 1320 }
1324 } 1321 }
1325 InstARM32Label *Label = InstARM32Label::create(Func, this); 1322 InstARM32Label *Label = InstARM32Label::create(Func, this);
1326 _br(Label, CondARM32::NE); 1323 _br(Label, CondARM32::NE);
1327 _trap(); 1324 _trap();
1328 Context.insert(Label); 1325 Context.insert(Label);
1329 } 1326 }
1330 1327
1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
1332 Operand *Src1, ExtInstr ExtFunc, 1329 Operand *Src1, ExtInstr ExtFunc,
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
1397 _orr(T, Src0, Src1RF); 1394 _orr(T, Src0, Src1RF);
1398 break; 1395 break;
1399 case InstArithmetic::Xor: 1396 case InstArithmetic::Xor:
1400 _eor(T, Src0, Src1RF); 1397 _eor(T, Src0, Src1RF);
1401 break; 1398 break;
1402 } 1399 }
1403 _mov(Dest, T); 1400 _mov(Dest, T);
1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
1405 } 1402 }
1406 1403
1404 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
1405 Variable *Dest, Operand *Src0,
1406 Operand *Src1) {
1407 // These helper-call-involved instructions are lowered in this separate
1408 // switch. This is because we would otherwise assume that we need to
1409 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1410 // helper calls, and such unused/redundant instructions will fail liveness
1411 // analysis under -Om1 setting.
1412 switch (Op) {
1413 default:
1414 break;
1415 case InstArithmetic::Udiv:
1416 case InstArithmetic::Sdiv:
1417 case InstArithmetic::Urem:
1418 case InstArithmetic::Srem: {
1419 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1420 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1421 // register, which will hide a constant source operand. Instead, check
1422 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1423 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1424 if (C64->getValue() == 0) {
1425 _trap();
1426 return;
1427 }
1428 } else {
1429 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1430 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1431 div0Check(IceType_i64, Src1Lo, Src1Hi);
1432 }
1433 // Technically, ARM has their own aeabi routines, but we can use the
sehr 2015/11/13 21:56:29 either "has its" or "have their".
John 2015/11/14 00:00:38 For a moment I thought this was Jim. :) Done.
1434 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1435 // the more standard __moddi3 for rem.
1436 const char *HelperName = "";
1437 switch (Op) {
1438 default:
1439 llvm::report_fatal_error("Should have only matched div ops.");
1440 break;
1441 case InstArithmetic::Udiv:
1442 HelperName = H_udiv_i64;
1443 break;
1444 case InstArithmetic::Sdiv:
1445 HelperName = H_sdiv_i64;
1446 break;
1447 case InstArithmetic::Urem:
1448 HelperName = H_urem_i64;
1449 break;
1450 case InstArithmetic::Srem:
1451 HelperName = H_srem_i64;
1452 break;
1453 }
1454 constexpr SizeT MaxSrcs = 2;
1455 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1456 Call->addArg(Src0);
1457 Call->addArg(Src1);
1458 lowerCall(Call);
1459 return;
1460 }
1461 }
1462
1463 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1464 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1465 Variable *Src0RLo = nullptr;
1466 Variable *Src0RHi = nullptr;
1467 // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr.
Jim Stichnoth 2015/11/16 13:56:10 s/got/for/ ? Lshr
1468 if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) {
1469 Src0RLo = legalizeToReg(loOperand(Src0));
1470 }
1471 if (Op != InstArithmetic::Shl) {
1472 Src0RHi = legalizeToReg(hiOperand(Src0));
1473 }
1474 Operand *Src1Lo = loOperand(Src1);
1475 Operand *Src1Hi = hiOperand(Src1);
1476 Variable *T_Lo = makeReg(DestLo->getType());
1477 Variable *T_Hi = makeReg(DestHi->getType());
1478
1479 switch (Op) {
1480 case InstArithmetic::_num:
1481 llvm::report_fatal_error("Unknown arithmetic operator");
1482 return;
1483 case InstArithmetic::Add:
1484 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1485 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1486 _adds(T_Lo, Src0RLo, Src1Lo);
1487 _mov(DestLo, T_Lo);
1488 _adc(T_Hi, Src0RHi, Src1Hi);
1489 _mov(DestHi, T_Hi);
1490 return;
1491 case InstArithmetic::And:
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1494 _and(T_Lo, Src0RLo, Src1Lo);
1495 _mov(DestLo, T_Lo);
1496 _and(T_Hi, Src0RHi, Src1Hi);
1497 _mov(DestHi, T_Hi);
1498 return;
1499 case InstArithmetic::Or:
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1502 _orr(T_Lo, Src0RLo, Src1Lo);
1503 _mov(DestLo, T_Lo);
1504 _orr(T_Hi, Src0RHi, Src1Hi);
1505 _mov(DestHi, T_Hi);
1506 return;
1507 case InstArithmetic::Xor:
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1510 _eor(T_Lo, Src0RLo, Src1Lo);
1511 _mov(DestLo, T_Lo);
1512 _eor(T_Hi, Src0RHi, Src1Hi);
1513 _mov(DestHi, T_Hi);
1514 return;
1515 case InstArithmetic::Sub:
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1518 _subs(T_Lo, Src0RLo, Src1Lo);
1519 _mov(DestLo, T_Lo);
1520 _sbc(T_Hi, Src0RHi, Src1Hi);
1521 _mov(DestHi, T_Hi);
1522 return;
1523 case InstArithmetic::Mul: {
1524 // GCC 4.8 does:
1525 // a=b*c ==>
1526 // t_acc =(mul) (b.lo * c.hi)
1527 // t_acc =(mla) (c.lo * b.hi) + t_acc
1528 // t.hi,t.lo =(umull) b.lo * c.lo
1529 // t.hi += t_acc
1530 // a.lo = t.lo
1531 // a.hi = t.hi
1532 //
1533 // LLVM does:
1534 // t.hi,t.lo =(umull) b.lo * c.lo
1535 // t.hi =(mla) (b.lo * c.hi) + t.hi
1536 // t.hi =(mla) (b.hi * c.lo) + t.hi
1537 // a.lo = t.lo
1538 // a.hi = t.hi
1539 //
1540 // LLVM's lowering has fewer instructions, but more register pressure:
1541 // t.lo is live from beginning to end, while GCC delays the two-dest
1542 // instruction till the end, and kills c.hi immediately.
1543 Variable *T_Acc = makeReg(IceType_i32);
1544 Variable *T_Acc1 = makeReg(IceType_i32);
1545 Variable *T_Hi1 = makeReg(IceType_i32);
1546 Variable *Src1RLo = legalizeToReg(Src1Lo);
1547 Variable *Src1RHi = legalizeToReg(Src1Hi);
1548 _mul(T_Acc, Src0RLo, Src1RHi);
1549 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1550 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1551 _add(T_Hi, T_Hi1, T_Acc1);
1552 _mov(DestLo, T_Lo);
1553 _mov(DestHi, T_Hi);
1554 return;
1555 }
1556 case InstArithmetic::Shl: {
1557 assert(Src0RLo != nullptr);
1558 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1559 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
1560 const int32_t ShAmtImm = C->getValue() & 0x3F;
1561 if (ShAmtImm == 0) {
1562 Src0RHi = legalizeToReg(hiOperand(Src0));
1563 _mov(DestLo, Src0RLo);
1564 _mov(DestHi, Src0RHi);
1565 return;
1566 }
1567
1568 if (ShAmtImm >= 32) {
1569 if (ShAmtImm == 32) {
1570 _mov(DestHi, Src0RLo);
1571 } else {
1572 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
1573 Legal_Reg | Legal_Flex);
1574 _lsl(T_Hi, Src0RLo, ShAmtOp);
1575 _mov(DestHi, T_Hi);
1576 }
1577
1578 Operand *_0 =
1579 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
1580 _mov(T_Lo, _0);
1581 _mov(DestLo, T_Lo);
1582 return;
1583 }
1584
1585 Src0RHi = legalizeToReg(hiOperand(Src0));
1586 Operand *ShAmtOp =
1587 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
1588 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
1589 Legal_Reg | Legal_Flex);
1590 _lsl(T_Hi, Src0RHi, ShAmtOp);
1591 _orr(T_Hi, T_Hi,
1592 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1593 OperandARM32::LSR, ComplShAmtOp));
1594 _mov(DestHi, T_Hi);
1595
1596 _lsl(T_Lo, Src0RLo, ShAmtOp);
1597 _mov(DestLo, T_Lo);
1598 return;
1599 }
1600
1601 // a=b<<c ==>
1602 // pnacl-llc does:
1603 // mov t_b.lo, b.lo
1604 // mov t_b.hi, b.hi
1605 // mov t_c.lo, c.lo
1606 // rsb T0, t_c.lo, #32
1607 // lsr T1, t_b.lo, T0
1608 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1609 // sub T2, t_c.lo, #32
1610 // cmp T2, #0
1611 // lslge t_a.hi, t_b.lo, T2
1612 // lsl t_a.lo, t_b.lo, t_c.lo
1613 // mov a.lo, t_a.lo
1614 // mov a.hi, t_a.hi
1615 //
1616 // GCC 4.8 does:
1617 // sub t_c1, c.lo, #32
1618 // lsl t_hi, b.hi, c.lo
1619 // orr t_hi, t_hi, b.lo, lsl t_c1
1620 // rsb t_c2, c.lo, #32
1621 // orr t_hi, t_hi, b.lo, lsr t_c2
1622 // lsl t_lo, b.lo, c.lo
1623 // a.lo = t_lo
1624 // a.hi = t_hi
1625 //
1626 // These are incompatible, therefore we mimic pnacl-llc.
1627 // Can be strength-reduced for constant-shifts, but we don't do that for
1628 // now.
1629 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1630 // ARM, shifts only take the lower 8 bits of the shift register, and
1631 // saturate to the range 0-32, so the negative value will saturate to 32.
1632 Constant *_32 = Ctx->getConstantInt32(32);
1633 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1634 Src0RHi = legalizeToReg(hiOperand(Src0));
1635 Variable *Src1RLo = legalizeToReg(Src1Lo);
1636 Variable *T0 = makeReg(IceType_i32);
1637 Variable *T1 = makeReg(IceType_i32);
1638 Variable *T2 = makeReg(IceType_i32);
1639 Variable *TA_Hi = makeReg(IceType_i32);
1640 Variable *TA_Lo = makeReg(IceType_i32);
1641 _rsb(T0, Src1RLo, _32);
1642 _lsr(T1, Src0RLo, T0);
1643 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1644 OperandARM32::LSL, Src1RLo));
1645 _sub(T2, Src1RLo, _32);
1646 _cmp(T2, _0);
1647 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1648 _set_dest_redefined();
1649 _lsl(TA_Lo, Src0RLo, Src1RLo);
1650 _mov(DestLo, TA_Lo);
1651 _mov(DestHi, TA_Hi);
1652 return;
1653 }
1654 case InstArithmetic::Lshr:
1655 case InstArithmetic::Ashr: {
1656 assert(Src0RHi != nullptr);
1657 const bool ASR = Op == InstArithmetic::Ashr;
1658 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
1659 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
1660 const int32_t ShAmtImm = C->getValue() & 0x3F;
1661 if (ShAmtImm == 0) {
1662 Src0RLo = legalizeToReg(loOperand(Src0));
1663 _mov(DestLo, Src0RLo);
1664 _mov(DestHi, Src0RHi);
1665 return;
1666 }
1667
1668 if (ShAmtImm >= 32) {
1669 if (ShAmtImm == 32) {
1670 _mov(DestLo, Src0RHi);
1671 } else {
1672 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
1673 Legal_Reg | Legal_Flex);
1674 if (ASR) {
1675 _asr(T_Lo, Src0RHi, ShAmtOp);
1676 } else {
1677 _lsr(T_Lo, Src0RHi, ShAmtOp);
1678 }
1679 _mov(DestLo, T_Lo);
1680 }
1681
1682 if (ASR) {
1683 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),
1684 Legal_Reg | Legal_Flex);
1685 _asr(T_Hi, Src0RHi, _31);
1686 } else {
1687 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
1688 Legal_Reg | Legal_Flex);
1689 _mov(T_Hi, _0);
1690 }
1691 _mov(DestHi, T_Hi);
1692 return;
1693 }
1694
1695 Src0RLo = legalizeToReg(loOperand(Src0));
1696 Operand *ShAmtOp =
1697 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
1698 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
1699 Legal_Reg | Legal_Flex);
1700 _lsr(T_Lo, Src0RLo, ShAmtOp);
1701 _orr(T_Lo, T_Lo,
1702 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1703 OperandARM32::LSL, ComplShAmtOp));
1704 _mov(DestLo, T_Lo);
1705
1706 if (ASR) {
1707 _asr(T_Hi, Src0RHi, ShAmtOp);
1708 } else {
1709 _lsr(T_Hi, Src0RHi, ShAmtOp);
1710 }
1711 _mov(DestHi, T_Hi);
1712 return;
1713 }
1714
1715 // a=b>>c
1716 // pnacl-llc does:
1717 // mov t_b.lo, b.lo
1718 // mov t_b.hi, b.hi
1719 // mov t_c.lo, c.lo
1720 // lsr T0, t_b.lo, t_c.lo
1721 // rsb T1, t_c.lo, #32
1722 // orr t_a.lo, T0, t_b.hi, lsl T1
1723 // sub T2, t_c.lo, #32
1724 // cmp T2, #0
1725 // [al]srge t_a.lo, t_b.hi, T2
1726 // [al]sr t_a.hi, t_b.hi, t_c.lo
1727 // mov a.lo, t_a.lo
1728 // mov a.hi, t_a.hi
1729 //
1730 // GCC 4.8 does (lsr):
1731 // rsb t_c1, c.lo, #32
1732 // lsr t_lo, b.lo, c.lo
1733 // orr t_lo, t_lo, b.hi, lsl t_c1
1734 // sub t_c2, c.lo, #32
1735 // orr t_lo, t_lo, b.hi, lsr t_c2
1736 // lsr t_hi, b.hi, c.lo
1737 // mov a.lo, t_lo
1738 // mov a.hi, t_hi
1739 //
1740 // These are incompatible, therefore we mimic pnacl-llc.
1741 const bool IsAshr = Op == InstArithmetic::Ashr;
1742 Constant *_32 = Ctx->getConstantInt32(32);
1743 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1744 Src0RLo = legalizeToReg(loOperand(Src0));
1745 Variable *Src1RLo = legalizeToReg(Src1Lo);
1746 Variable *T0 = makeReg(IceType_i32);
1747 Variable *T1 = makeReg(IceType_i32);
1748 Variable *T2 = makeReg(IceType_i32);
1749 Variable *TA_Lo = makeReg(IceType_i32);
1750 Variable *TA_Hi = makeReg(IceType_i32);
1751 _lsr(T0, Src0RLo, Src1RLo);
1752 _rsb(T1, Src1RLo, _32);
1753 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1754 OperandARM32::LSL, T1));
1755 _sub(T2, Src1RLo, _32);
1756 _cmp(T2, _0);
1757 if (IsAshr) {
1758 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1759 _set_dest_redefined();
1760 _asr(TA_Hi, Src0RHi, Src1RLo);
1761 } else {
1762 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1763 _set_dest_redefined();
1764 _lsr(TA_Hi, Src0RHi, Src1RLo);
1765 }
1766 _mov(DestLo, TA_Lo);
1767 _mov(DestHi, TA_Hi);
1768 return;
1769 }
1770 case InstArithmetic::Fadd:
1771 case InstArithmetic::Fsub:
1772 case InstArithmetic::Fmul:
1773 case InstArithmetic::Fdiv:
1774 case InstArithmetic::Frem:
1775 llvm::report_fatal_error("FP instruction with i64 type");
1776 return;
1777 case InstArithmetic::Udiv:
1778 case InstArithmetic::Sdiv:
1779 case InstArithmetic::Urem:
1780 case InstArithmetic::Srem:
1781 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
1782 "should have already been handled before");
1783 return;
1784 }
1785 }
1786
1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 1787 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1408 Variable *Dest = Inst->getDest(); 1788 Variable *Dest = Inst->getDest();
1409 if (Dest->getType() == IceType_i1) { 1789 if (Dest->getType() == IceType_i1) {
1410 lowerInt1Arithmetic(Inst); 1790 lowerInt1Arithmetic(Inst);
1411 return; 1791 return;
1412 } 1792 }
1413 1793
1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to 1794 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1415 // legalize Src0 to flex or Src1 to flex and there is a reversible 1795 // legalize Src0 to flex or Src1 to flex and there is a reversible
1416 // instruction. E.g., reverse subtract with immediate, register vs register, 1796 // instruction. E.g., reverse subtract with immediate, register vs register,
1417 // immediate. 1797 // immediate.
1418 // Or it may be the case that the operands aren't swapped, but the bits can 1798 // Or it may be the case that the operands aren't swapped, but the bits can
1419 // be flipped and a different operation applied. E.g., use BIC (bit clear) 1799 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1420 // instead of AND for some masks. 1800 // instead of AND for some masks.
1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 1801 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 1802 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1423 if (Dest->getType() == IceType_i64) { 1803 if (Dest->getType() == IceType_i64) {
1424 // These helper-call-involved instructions are lowered in this separate 1804 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
1425 // switch. This is because we would otherwise assume that we need to
1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1427 // helper calls, and such unused/redundant instructions will fail liveness
1428 // analysis under -Om1 setting.
1429 switch (Inst->getOp()) {
1430 default:
1431 break;
1432 case InstArithmetic::Udiv:
1433 case InstArithmetic::Sdiv:
1434 case InstArithmetic::Urem:
1435 case InstArithmetic::Srem: {
1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1438 // register, which will hide a constant source operand. Instead, check
1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1441 if (C64->getValue() == 0) {
1442 _trap();
1443 return;
1444 }
1445 } else {
1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1448 div0Check(IceType_i64, Src1Lo, Src1Hi);
1449 }
1450 // Technically, ARM has their own aeabi routines, but we can use the
1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1452 // the more standard __moddi3 for rem.
1453 const char *HelperName = "";
1454 switch (Inst->getOp()) {
1455 default:
1456 llvm_unreachable("Should have only matched div ops.");
1457 break;
1458 case InstArithmetic::Udiv:
1459 HelperName = H_udiv_i64;
1460 break;
1461 case InstArithmetic::Sdiv:
1462 HelperName = H_sdiv_i64;
1463 break;
1464 case InstArithmetic::Urem:
1465 HelperName = H_urem_i64;
1466 break;
1467 case InstArithmetic::Srem:
1468 HelperName = H_srem_i64;
1469 break;
1470 }
1471 constexpr SizeT MaxSrcs = 2;
1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
1473 Call->addArg(Src0);
1474 Call->addArg(Src1);
1475 lowerCall(Call);
1476 return;
1477 }
1478 }
1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0));
1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
1483 Operand *Src1Lo = loOperand(Src1);
1484 Operand *Src1Hi = hiOperand(Src1);
1485 Variable *T_Lo = makeReg(DestLo->getType());
1486 Variable *T_Hi = makeReg(DestHi->getType());
1487 switch (Inst->getOp()) {
1488 case InstArithmetic::_num:
1489 llvm_unreachable("Unknown arithmetic operator");
1490 return;
1491 case InstArithmetic::Add:
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1494 _adds(T_Lo, Src0RLo, Src1Lo);
1495 _mov(DestLo, T_Lo);
1496 _adc(T_Hi, Src0RHi, Src1Hi);
1497 _mov(DestHi, T_Hi);
1498 return;
1499 case InstArithmetic::And:
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1502 _and(T_Lo, Src0RLo, Src1Lo);
1503 _mov(DestLo, T_Lo);
1504 _and(T_Hi, Src0RHi, Src1Hi);
1505 _mov(DestHi, T_Hi);
1506 return;
1507 case InstArithmetic::Or:
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1510 _orr(T_Lo, Src0RLo, Src1Lo);
1511 _mov(DestLo, T_Lo);
1512 _orr(T_Hi, Src0RHi, Src1Hi);
1513 _mov(DestHi, T_Hi);
1514 return;
1515 case InstArithmetic::Xor:
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1518 _eor(T_Lo, Src0RLo, Src1Lo);
1519 _mov(DestLo, T_Lo);
1520 _eor(T_Hi, Src0RHi, Src1Hi);
1521 _mov(DestHi, T_Hi);
1522 return;
1523 case InstArithmetic::Sub:
1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
1526 _subs(T_Lo, Src0RLo, Src1Lo);
1527 _mov(DestLo, T_Lo);
1528 _sbc(T_Hi, Src0RHi, Src1Hi);
1529 _mov(DestHi, T_Hi);
1530 return;
1531 case InstArithmetic::Mul: {
1532 // GCC 4.8 does:
1533 // a=b*c ==>
1534 // t_acc =(mul) (b.lo * c.hi)
1535 // t_acc =(mla) (c.lo * b.hi) + t_acc
1536 // t.hi,t.lo =(umull) b.lo * c.lo
1537 // t.hi += t_acc
1538 // a.lo = t.lo
1539 // a.hi = t.hi
1540 //
1541 // LLVM does:
1542 // t.hi,t.lo =(umull) b.lo * c.lo
1543 // t.hi =(mla) (b.lo * c.hi) + t.hi
1544 // t.hi =(mla) (b.hi * c.lo) + t.hi
1545 // a.lo = t.lo
1546 // a.hi = t.hi
1547 //
1548 // LLVM's lowering has fewer instructions, but more register pressure:
1549 // t.lo is live from beginning to end, while GCC delays the two-dest
1550 // instruction till the end, and kills c.hi immediately.
1551 Variable *T_Acc = makeReg(IceType_i32);
1552 Variable *T_Acc1 = makeReg(IceType_i32);
1553 Variable *T_Hi1 = makeReg(IceType_i32);
1554 Variable *Src1RLo = legalizeToReg(Src1Lo);
1555 Variable *Src1RHi = legalizeToReg(Src1Hi);
1556 _mul(T_Acc, Src0RLo, Src1RHi);
1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
1559 _add(T_Hi, T_Hi1, T_Acc1);
1560 _mov(DestLo, T_Lo);
1561 _mov(DestHi, T_Hi);
1562 return;
1563 }
1564 case InstArithmetic::Shl: {
1565 // a=b<<c ==>
1566 // pnacl-llc does:
1567 // mov t_b.lo, b.lo
1568 // mov t_b.hi, b.hi
1569 // mov t_c.lo, c.lo
1570 // rsb T0, t_c.lo, #32
1571 // lsr T1, t_b.lo, T0
1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
1573 // sub T2, t_c.lo, #32
1574 // cmp T2, #0
1575 // lslge t_a.hi, t_b.lo, T2
1576 // lsl t_a.lo, t_b.lo, t_c.lo
1577 // mov a.lo, t_a.lo
1578 // mov a.hi, t_a.hi
1579 //
1580 // GCC 4.8 does:
1581 // sub t_c1, c.lo, #32
1582 // lsl t_hi, b.hi, c.lo
1583 // orr t_hi, t_hi, b.lo, lsl t_c1
1584 // rsb t_c2, c.lo, #32
1585 // orr t_hi, t_hi, b.lo, lsr t_c2
1586 // lsl t_lo, b.lo, c.lo
1587 // a.lo = t_lo
1588 // a.hi = t_hi
1589 //
1590 // These are incompatible, therefore we mimic pnacl-llc.
1591 // Can be strength-reduced for constant-shifts, but we don't do that for
1592 // now.
1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1594 // ARM, shifts only take the lower 8 bits of the shift register, and
1595 // saturate to the range 0-32, so the negative value will saturate to 32.
1596 Constant *_32 = Ctx->getConstantInt32(32);
1597 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1598 Variable *Src1RLo = legalizeToReg(Src1Lo);
1599 Variable *T0 = makeReg(IceType_i32);
1600 Variable *T1 = makeReg(IceType_i32);
1601 Variable *T2 = makeReg(IceType_i32);
1602 Variable *TA_Hi = makeReg(IceType_i32);
1603 Variable *TA_Lo = makeReg(IceType_i32);
1604 _rsb(T0, Src1RLo, _32);
1605 _lsr(T1, Src0RLo, T0);
1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1607 OperandARM32::LSL, Src1RLo));
1608 _sub(T2, Src1RLo, _32);
1609 _cmp(T2, _0);
1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
1611 _set_dest_redefined();
1612 _lsl(TA_Lo, Src0RLo, Src1RLo);
1613 _mov(DestLo, TA_Lo);
1614 _mov(DestHi, TA_Hi);
1615 return;
1616 }
1617 case InstArithmetic::Lshr:
1618 case InstArithmetic::Ashr: {
1619 // a=b>>c
1620 // pnacl-llc does:
1621 // mov t_b.lo, b.lo
1622 // mov t_b.hi, b.hi
1623 // mov t_c.lo, c.lo
1624 // lsr T0, t_b.lo, t_c.lo
1625 // rsb T1, t_c.lo, #32
1626 // orr t_a.lo, T0, t_b.hi, lsl T1
1627 // sub T2, t_c.lo, #32
1628 // cmp T2, #0
1629 // [al]srge t_a.lo, t_b.hi, T2
1630 // [al]sr t_a.hi, t_b.hi, t_c.lo
1631 // mov a.lo, t_a.lo
1632 // mov a.hi, t_a.hi
1633 //
1634 // GCC 4.8 does (lsr):
1635 // rsb t_c1, c.lo, #32
1636 // lsr t_lo, b.lo, c.lo
1637 // orr t_lo, t_lo, b.hi, lsl t_c1
1638 // sub t_c2, c.lo, #32
1639 // orr t_lo, t_lo, b.hi, lsr t_c2
1640 // lsr t_hi, b.hi, c.lo
1641 // mov a.lo, t_lo
1642 // mov a.hi, t_hi
1643 //
1644 // These are incompatible, therefore we mimic pnacl-llc.
1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1646 Constant *_32 = Ctx->getConstantInt32(32);
1647 Constant *_0 = Ctx->getConstantZero(IceType_i32);
1648 Variable *Src1RLo = legalizeToReg(Src1Lo);
1649 Variable *T0 = makeReg(IceType_i32);
1650 Variable *T1 = makeReg(IceType_i32);
1651 Variable *T2 = makeReg(IceType_i32);
1652 Variable *TA_Lo = makeReg(IceType_i32);
1653 Variable *TA_Hi = makeReg(IceType_i32);
1654 _lsr(T0, Src0RLo, Src1RLo);
1655 _rsb(T1, Src1RLo, _32);
1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1657 OperandARM32::LSL, T1));
1658 _sub(T2, Src1RLo, _32);
1659 _cmp(T2, _0);
1660 if (IsAshr) {
1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1662 _set_dest_redefined();
1663 _asr(TA_Hi, Src0RHi, Src1RLo);
1664 } else {
1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
1666 _set_dest_redefined();
1667 _lsr(TA_Hi, Src0RHi, Src1RLo);
1668 }
1669 _mov(DestLo, TA_Lo);
1670 _mov(DestHi, TA_Hi);
1671 return;
1672 }
1673 case InstArithmetic::Fadd:
1674 case InstArithmetic::Fsub:
1675 case InstArithmetic::Fmul:
1676 case InstArithmetic::Fdiv:
1677 case InstArithmetic::Frem:
1678 llvm_unreachable("FP instruction with i64 type");
1679 return;
1680 case InstArithmetic::Udiv:
1681 case InstArithmetic::Sdiv:
1682 case InstArithmetic::Urem:
1683 case InstArithmetic::Srem:
1684 llvm_unreachable("Call-helper-involved instruction for i64 type "
1685 "should have already been handled before");
1686 return;
1687 }
1688 return; 1805 return;
1689 } else if (isVectorType(Dest->getType())) { 1806 }
1807
1808 if (isVectorType(Dest->getType())) {
1690 // Add a fake def to keep liveness consistent in the meantime. 1809 // Add a fake def to keep liveness consistent in the meantime.
1691 Variable *T = makeReg(Dest->getType()); 1810 Variable *T = makeReg(Dest->getType());
1692 Context.insert(InstFakeDef::create(Func, T)); 1811 Context.insert(InstFakeDef::create(Func, T));
1693 _mov(Dest, T); 1812 _mov(Dest, T);
1694 UnimplementedError(Func->getContext()->getFlags()); 1813 UnimplementedError(Func->getContext()->getFlags());
1695 return; 1814 return;
1696 } 1815 }
1816
1697 // Dest->getType() is a non-i64 scalar. 1817 // Dest->getType() is a non-i64 scalar.
1698 Variable *Src0R = legalizeToReg(Src0); 1818 Variable *Src0R = legalizeToReg(Src0);
1699 Variable *T = makeReg(Dest->getType()); 1819 Variable *T = makeReg(Dest->getType());
1820
1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect 1821 // Handle div/rem separately. They require a non-legalized Src1 to inspect
1701 // whether or not Src1 is a non-zero constant. Once legalized it is more 1822 // whether or not Src1 is a non-zero constant. Once legalized it is more
1702 // difficult to determine (constant may be moved to a register). 1823 // difficult to determine (constant may be moved to a register).
1703 switch (Inst->getOp()) { 1824 switch (Inst->getOp()) {
1704 default: 1825 default:
1705 break; 1826 break;
1706 case InstArithmetic::Udiv: { 1827 case InstArithmetic::Udiv: {
1707 constexpr bool NotRemainder = false; 1828 constexpr bool NotRemainder = false;
1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, 1829 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
1709 H_udiv_i32, NotRemainder); 1830 H_udiv_i32, NotRemainder);
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1766 Variable *Src1R = legalizeToReg(Src1); 1887 Variable *Src1R = legalizeToReg(Src1);
1767 _vdiv(T, Src0R, Src1R); 1888 _vdiv(T, Src0R, Src1R);
1768 _mov(Dest, T); 1889 _mov(Dest, T);
1769 return; 1890 return;
1770 } 1891 }
1771 } 1892 }
1772 1893
1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); 1894 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
1774 switch (Inst->getOp()) { 1895 switch (Inst->getOp()) {
1775 case InstArithmetic::_num: 1896 case InstArithmetic::_num:
1776 llvm_unreachable("Unknown arithmetic operator"); 1897 llvm::report_fatal_error("Unknown arithmetic operator");
1777 return; 1898 return;
1778 case InstArithmetic::Add: 1899 case InstArithmetic::Add:
1779 _add(T, Src0R, Src1RF); 1900 _add(T, Src0R, Src1RF);
1780 _mov(Dest, T); 1901 _mov(Dest, T);
1781 return; 1902 return;
1782 case InstArithmetic::And: 1903 case InstArithmetic::And:
1783 _and(T, Src0R, Src1RF); 1904 _and(T, Src0R, Src1RF);
1784 _mov(Dest, T); 1905 _mov(Dest, T);
1785 return; 1906 return;
1786 case InstArithmetic::Or: 1907 case InstArithmetic::Or:
(...skipping 29 matching lines...) Expand all
1816 if (Dest->getType() != IceType_i32) { 1937 if (Dest->getType() != IceType_i32) {
1817 _sxt(Src0R, Src0R); 1938 _sxt(Src0R, Src0R);
1818 } 1939 }
1819 _asr(T, Src0R, Src1RF); 1940 _asr(T, Src0R, Src1RF);
1820 _mov(Dest, T); 1941 _mov(Dest, T);
1821 return; 1942 return;
1822 case InstArithmetic::Udiv: 1943 case InstArithmetic::Udiv:
1823 case InstArithmetic::Sdiv: 1944 case InstArithmetic::Sdiv:
1824 case InstArithmetic::Urem: 1945 case InstArithmetic::Urem:
1825 case InstArithmetic::Srem: 1946 case InstArithmetic::Srem:
1826 llvm_unreachable("Integer div/rem should have been handled earlier."); 1947 llvm::report_fatal_error(
1948 "Integer div/rem should have been handled earlier.");
1827 return; 1949 return;
1828 case InstArithmetic::Fadd: 1950 case InstArithmetic::Fadd:
1829 case InstArithmetic::Fsub: 1951 case InstArithmetic::Fsub:
1830 case InstArithmetic::Fmul: 1952 case InstArithmetic::Fmul:
1831 case InstArithmetic::Fdiv: 1953 case InstArithmetic::Fdiv:
1832 case InstArithmetic::Frem: 1954 case InstArithmetic::Frem:
1833 llvm_unreachable("Floating point arith should have been handled earlier."); 1955 llvm::report_fatal_error(
1956 "Floating point arith should have been handled earlier.");
1834 return; 1957 return;
1835 } 1958 }
1836 } 1959 }
1837 1960
1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { 1961 void TargetARM32::lowerAssign(const InstAssign *Inst) {
1839 Variable *Dest = Inst->getDest(); 1962 Variable *Dest = Inst->getDest();
1840 Operand *Src0 = Inst->getSrc(0); 1963 Operand *Src0 = Inst->getSrc(0);
1841 assert(Dest->getType() == Src0->getType()); 1964 assert(Dest->getType() == Src0->getType());
1842 if (Dest->getType() == IceType_i64) { 1965 if (Dest->getType() == IceType_i64) {
1843 Src0 = legalizeUndef(Src0); 1966 Src0 = legalizeUndef(Src0);
1967
1968 Variable *T_Lo = makeReg(IceType_i32);
1969 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 1970 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1848 Variable *T_Lo = makeReg(IceType_i32);
1849 Variable *T_Hi = makeReg(IceType_i32);
1850
1851 _mov(T_Lo, Src0Lo); 1971 _mov(T_Lo, Src0Lo);
1852 _mov(DestLo, T_Lo); 1972 _mov(DestLo, T_Lo);
1973
1974 Variable *T_Hi = makeReg(IceType_i32);
1975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1976 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
1853 _mov(T_Hi, Src0Hi); 1977 _mov(T_Hi, Src0Hi);
1854 _mov(DestHi, T_Hi); 1978 _mov(DestHi, T_Hi);
1979
1980 return;
1981 }
1982
1983 Operand *NewSrc;
1984 if (Dest->hasReg()) {
1985 // If Dest already has a physical register, then legalize the Src operand
1986 // into a Variable with the same register assignment. This especially
1987 // helps allow the use of Flex operands.
1988 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1855 } else { 1989 } else {
1856 Operand *NewSrc; 1990 // Dest could be a stack operand. Since we could potentially need to do a
1857 if (Dest->hasReg()) { 1991 // Store (and store can only have Register operands), legalize this to a
1858 // If Dest already has a physical register, then legalize the Src operand 1992 // register.
1859 // into a Variable with the same register assignment. This especially 1993 NewSrc = legalize(Src0, Legal_Reg);
1860 // helps allow the use of Flex operands.
1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1862 } else {
1863 // Dest could be a stack operand. Since we could potentially need to do a
1864 // Store (and store can only have Register operands), legalize this to a
1865 // register.
1866 NewSrc = legalize(Src0, Legal_Reg);
1867 }
1868 if (isVectorType(Dest->getType())) {
1869 Variable *SrcR = legalizeToReg(NewSrc);
1870 _mov(Dest, SrcR);
1871 } else if (isFloatingType(Dest->getType())) {
1872 Variable *SrcR = legalizeToReg(NewSrc);
1873 _mov(Dest, SrcR);
1874 } else {
1875 _mov(Dest, NewSrc);
1876 }
1877 } 1994 }
1995
1996 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
1997 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
1998 }
1999 _mov(Dest, NewSrc);
1878 } 2000 }
1879 2001
1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( 2002 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, 2003 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { 2004 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
1883 InstARM32Label *NewShortCircuitLabel = nullptr; 2005 InstARM32Label *NewShortCircuitLabel = nullptr;
1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); 2006 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
1885 2007
1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); 2008 const Inst *Producer = BoolComputations.getProducerOf(Boolean);
1887 2009
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after
2573 struct { 2695 struct {
2574 CondARM32::Cond CC0; 2696 CondARM32::Cond CC0;
2575 CondARM32::Cond CC1; 2697 CondARM32::Cond CC1;
2576 } TableFcmp[] = { 2698 } TableFcmp[] = {
2577 #define X(val, CC0, CC1) \ 2699 #define X(val, CC0, CC1) \
2578 { CondARM32::CC0, CondARM32::CC1 } \ 2700 { CondARM32::CC0, CondARM32::CC1 } \
2579 , 2701 ,
2580 FCMPARM32_TABLE 2702 FCMPARM32_TABLE
2581 #undef X 2703 #undef X
2582 }; 2704 };
2705
sehr 2015/11/13 21:56:29 Is there a more common place for this sort of func
John 2015/11/13 22:00:41 Maybe. If you think it's useful, you could add fro
John 2015/11/14 00:00:38 Oh, I thought this was Jim. He had the same routin
2706 bool isFloatingPointZero(Operand *Src) {
2707 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {
2708 return F32->getValue() == 0.0f;
2709 }
2710
2711 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {
2712 return F64->getValue() == 0.0;
2713 }
2714
2715 return false;
2716 }
2583 } // end of anonymous namespace 2717 } // end of anonymous namespace
2584 2718
2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { 2719 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
2586 InstFcmp::FCond Condition = Instr->getCondition(); 2720 InstFcmp::FCond Condition = Instr->getCondition();
2587 switch (Condition) { 2721 switch (Condition) {
2588 case InstFcmp::False: 2722 case InstFcmp::False:
2589 return CondWhenTrue(CondARM32::kNone); 2723 return CondWhenTrue(CondARM32::kNone);
2590 case InstFcmp::True: 2724 case InstFcmp::True:
2591 return CondWhenTrue(CondARM32::AL); 2725 return CondWhenTrue(CondARM32::AL);
2592 break; 2726 break;
2593 default: { 2727 default: {
2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); 2728 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); 2729 Operand *Src1 = Instr->getSrc(1);
2596 _vcmp(Src0R, Src1R); 2730 if (isFloatingPointZero(Src1)) {
2731 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
2732 } else {
2733 _vcmp(Src0R, legalizeToReg(Src1));
2734 }
2597 _vmrs(); 2735 _vmrs();
2598 assert(Condition < llvm::array_lengthof(TableFcmp)); 2736 assert(Condition < llvm::array_lengthof(TableFcmp));
2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); 2737 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
2600 } 2738 }
2601 } 2739 }
2602 } 2740 }
2603 2741
2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { 2742 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
2605 Variable *Dest = Instr->getDest(); 2743 Variable *Dest = Instr->getDest();
2606 if (isVectorType(Dest->getType())) { 2744 if (isVectorType(Dest->getType())) {
(...skipping 27 matching lines...) Expand all
2634 } else { 2772 } else {
2635 _mov(T, _1, Cond.WhenTrue0); 2773 _mov(T, _1, Cond.WhenTrue0);
2636 } 2774 }
2637 2775
2638 if (Cond.WhenTrue1 != CondARM32::kNone) { 2776 if (Cond.WhenTrue1 != CondARM32::kNone) {
2639 _mov_redefined(T, _1, Cond.WhenTrue1); 2777 _mov_redefined(T, _1, Cond.WhenTrue1);
2640 } 2778 }
2641 2779
2642 _mov(Dest, T); 2780 _mov(Dest, T);
2643 } 2781 }
2782 TargetARM32::CondWhenTrue
2783 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
2784 Operand *Src1) {
2785 size_t Index = static_cast<size_t>(Condition);
2786 assert(Index < llvm::array_lengthof(TableIcmp64));
2644 2787
2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { 2788 Operand *NonConstOp = nullptr;
2646 assert(Inst->getSrc(0)->getType() != IceType_i1); 2789 uint64_t Value;
2647 assert(Inst->getSrc(1)->getType() != IceType_i1); 2790 if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2791 Value = C->getValue();
2792 NonConstOp = Src0;
2793 } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) {
2794 Value = C->getValue();
2795 NonConstOp = Src1;
2796 }
2648 2797
2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2798 Variable *Src0RLo, *Src0RHi;
2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 2799 Operand *Src1RFLo, *Src1RFHi;
2800
2801 if (NonConstOp != nullptr) {
2802 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
2803 Value == 0) {
2804 Variable *T = makeReg(IceType_i32);
2805 _orrs(T, legalizeToReg(loOperand(NonConstOp)),
2806 legalize(hiOperand(NonConstOp), Legal_Reg | Legal_Flex));
2807 Context.insert(InstFakeUse::create(Func, T));
2808 return CondWhenTrue(TableIcmp64[Index].C1);
2809 }
2810
2811 Src0RLo = legalizeToReg(loOperand(NonConstOp));
2812 Src0RHi = legalizeToReg(hiOperand(NonConstOp));
2813 if ((Value >> 32) == (Value & 0xFFFFFFFF)) {
2814 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),
2815 Legal_Reg | Legal_Flex);
2816 Src1RFHi = Src1RFLo;
2817 } else {
2818 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),
2819 Legal_Reg | Legal_Flex);
2820 Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF),
2821 Legal_Reg | Legal_Flex);
2822 }
2823
2824 bool UseRsb = false;
2825 if (TableIcmp64[Index].Swapped) {
2826 UseRsb = NonConstOp == Src0;
2827 } else {
2828 UseRsb = NonConstOp == Src1;
2829 }
2830
2831 if (UseRsb) {
2832 if (TableIcmp64[Index].IsSigned) {
2833 Variable *T = makeReg(IceType_i32);
2834 _rsbs(T, Src0RLo, Src1RFLo);
2835 Context.insert(InstFakeUse::create(Func, T));
2836
2837 T = makeReg(IceType_i32);
2838 _rscs(T, Src0RHi, Src1RFHi);
2839 // We need to add a FakeUse here because liveness gets mad at us (Def
2840 // without Use.) Note that flag-setting instructions are considered to
2841 // have side effects and, therefore, are not DCE'ed.
2842 Context.insert(InstFakeUse::create(Func, T));
2843 } else {
2844 Variable *T = makeReg(IceType_i32);
2845 _rsbs(T, Src0RHi, Src1RFHi);
2846 Context.insert(InstFakeUse::create(Func, T));
2847
2848 T = makeReg(IceType_i32);
2849 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
2850 Context.insert(InstFakeUse::create(Func, T));
2851 }
2852 } else {
2853 if (TableIcmp64[Index].IsSigned) {
2854 _cmp(Src0RLo, Src1RFLo);
2855 Variable *T = makeReg(IceType_i32);
2856 _sbcs(T, Src0RHi, Src1RFHi);
2857 Context.insert(InstFakeUse::create(Func, T));
2858 } else {
2859 _cmp(Src0RHi, Src1RFHi);
2860 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
2861 }
2862 }
2863
2864 return CondWhenTrue(TableIcmp64[Index].C1);
2865 }
2866
2867 if (TableIcmp64[Index].Swapped) {
2868 Src0RLo = legalizeToReg(loOperand(Src1));
2869 Src0RHi = legalizeToReg(hiOperand(Src1));
2870 Src1RFLo = legalizeToReg(loOperand(Src0));
2871 Src1RFHi = legalizeToReg(hiOperand(Src0));
2872 } else {
2873 Src0RLo = legalizeToReg(loOperand(Src0));
2874 Src0RHi = legalizeToReg(hiOperand(Src0));
2875 Src1RFLo = legalizeToReg(loOperand(Src1));
2876 Src1RFHi = legalizeToReg(hiOperand(Src1));
2877 }
2651 2878
2652 // a=icmp cond, b, c ==> 2879 // a=icmp cond, b, c ==>
2653 // GCC does: 2880 // GCC does:
2654 // cmp b.hi, c.hi or cmp b.lo, c.lo 2881 // cmp b.hi, c.hi or cmp b.lo, c.lo
2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 2882 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2656 // mov.<C1> t, #1 mov.<C1> t, #1 2883 // mov.<C1> t, #1 mov.<C1> t, #1
2657 // mov.<C2> t, #0 mov.<C2> t, #0 2884 // mov.<C2> t, #0 mov.<C2> t, #0
2658 // mov a, t mov a, t 2885 // mov a, t mov a, t
2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 2886 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2660 // is used for signed compares. In some cases, b and c need to be swapped as 2887 // is used for signed compares. In some cases, b and c need to be swapped as
(...skipping 10 matching lines...) Expand all
2671 // that's nice in that it's just as short but has fewer dependencies for 2898 // that's nice in that it's just as short but has fewer dependencies for
2672 // better ILP at the cost of more registers. 2899 // better ILP at the cost of more registers.
2673 // 2900 //
2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two 2901 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2675 // unconditional mov #0, two cmps, two conditional mov #1, and one 2902 // unconditional mov #0, two cmps, two conditional mov #1, and one
2676 // conditional reg mov. That has few dependencies for good ILP, but is a 2903 // conditional reg mov. That has few dependencies for good ILP, but is a
2677 // longer sequence. 2904 // longer sequence.
2678 // 2905 //
2679 // So, we are going with the GCC version since it's usually better (except 2906 // So, we are going with the GCC version since it's usually better (except
2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 2907 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2908 if (TableIcmp64[Index].IsSigned) {
2909 Variable *ScratchReg = makeReg(IceType_i32);
2910 _cmp(Src0RLo, Src1RFLo);
2911 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
2912 // ScratchReg isn't going to be used, but we need the side-effect of
2913 // setting flags from this operation.
2914 Context.insert(InstFakeUse::create(Func, ScratchReg));
2915 } else {
2916 _cmp(Src0RHi, Src1RFHi);
2917 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
2918 }
2919 return CondWhenTrue(TableIcmp64[Index].C1);
2920 }
2681 2921
2682 if (Src0->getType() == IceType_i64) { 2922 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
2683 InstIcmp::ICond Conditon = Inst->getCondition(); 2923 assert(Inst->getSrc(0)->getType() != IceType_i1);
2684 size_t Index = static_cast<size_t>(Conditon); 2924 assert(Inst->getSrc(1)->getType() != IceType_i1);
2685 assert(Index < llvm::array_lengthof(TableIcmp64));
2686 Variable *Src0Lo, *Src0Hi;
2687 Operand *Src1LoRF, *Src1HiRF;
2688 if (TableIcmp64[Index].Swapped) {
2689 Src0Lo = legalizeToReg(loOperand(Src1));
2690 Src0Hi = legalizeToReg(hiOperand(Src1));
2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2693 } else {
2694 Src0Lo = legalizeToReg(loOperand(Src0));
2695 Src0Hi = legalizeToReg(hiOperand(Src0));
2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2698 }
2699 if (TableIcmp64[Index].IsSigned) {
2700 Variable *ScratchReg = makeReg(IceType_i32);
2701 _cmp(Src0Lo, Src1LoRF);
2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2703 // ScratchReg isn't going to be used, but we need the side-effect of
2704 // setting flags from this operation.
2705 Context.insert(InstFakeUse::create(Func, ScratchReg));
2706 } else {
2707 _cmp(Src0Hi, Src1HiRF);
2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2709 }
2710 return CondWhenTrue(TableIcmp64[Index].C1);
2711 }
2712 2925
2926 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
2927 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
2928
2929 InstIcmp::ICond Condition = Inst->getCondition();
2713 // a=icmp cond b, c ==> 2930 // a=icmp cond b, c ==>
2714 // GCC does: 2931 // GCC does:
2715 // <u/s>xtb tb, b 2932 // <u/s>xtb tb, b
2716 // <u/s>xtb tc, c 2933 // <u/s>xtb tc, c
2717 // cmp tb, tc 2934 // cmp tb, tc
2718 // mov.C1 t, #0 2935 // mov.C1 t, #0
2719 // mov.C2 t, #1 2936 // mov.C2 t, #1
2720 // mov a, t 2937 // mov a, t
2721 // where the unsigned/sign extension is not needed for 32-bit. They also have 2938 // where the unsigned/sign extension is not needed for 32-bit. They also have
2722 // special cases for EQ and NE. E.g., for NE: 2939 // special cases for EQ and NE. E.g., for NE:
2723 // <extend to tb, tc> 2940 // <extend to tb, tc>
2724 // subs t, tb, tc 2941 // subs t, tb, tc
2725 // movne t, #1 2942 // movne t, #1
2726 // mov a, t 2943 // mov a, t
2727 // 2944 //
2728 // LLVM does: 2945 // LLVM does:
2729 // lsl tb, b, #<N> 2946 // lsl tb, b, #<N>
2730 // mov t, #0 2947 // mov t, #0
2731 // cmp tb, c, lsl #<N> 2948 // cmp tb, c, lsl #<N>
2732 // mov.<C> t, #1 2949 // mov.<C> t, #1
2733 // mov a, t 2950 // mov a, t
2734 // 2951 //
2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on 2952 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For 2953 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb 2954 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2738 // first. It's not clear to me why that special-casing is needed. 2955 // first. It's not clear to me why that special-casing is needed.
2739 // 2956 //
2740 // We'll go with the LLVM way for now, since it's shorter and has just as few 2957 // We'll go with the LLVM way for now, since it's shorter and has just as few
2741 // dependencies. 2958 // dependencies.
2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 2959 Operand *NonConstOp = nullptr;
2743 assert(ShiftAmt >= 0); 2960 int32_t Value;
2744 Constant *ShiftConst = nullptr; 2961 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2745 Variable *Src0R = nullptr; 2962 Value = C->getValue();
2746 if (ShiftAmt) { 2963 NonConstOp = Src0;
2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); 2964 } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) {
2748 Src0R = makeReg(IceType_i32); 2965 Value = C->getValue();
2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); 2966 NonConstOp = Src1;
2750 } else {
2751 Src0R = legalizeToReg(Src0);
2752 } 2967 }
2753 if (ShiftAmt) { 2968
2969 switch (Src0->getType()) {
2970 default:
2971 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
2972 case IceType_i64:
2973 return lowerInt64IcmpCond(Condition, Src0, Src1);
2974 case IceType_i8:
2975 case IceType_i16: {
2976 int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
2977 assert(ShAmt >= 0);
2978
2979 if (NonConstOp != nullptr) {
2980 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
2981 Value == 0) {
2982 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);
2983 Variable *T = makeReg(IceType_i32);
2984 _lsls(T, legalizeToReg(NonConstOp), ShAmtOp);
2985 Context.insert(InstFakeUse::create(Func, T));
2986 return CondWhenTrue(getIcmp32Mapping(Condition));
2987 }
2988 Variable *ConstR = makeReg(IceType_i32);
2989 _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt),
2990 Legal_Reg | Legal_Flex));
2991 Operand *NonConstF = OperandARM32FlexReg::create(
2992 Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL,
2993 Ctx->getConstantInt32(ShAmt));
2994
2995 if (Src1 == NonConstOp) {
2996 _cmp(ConstR, NonConstF);
2997 } else {
2998 Variable *T = makeReg(IceType_i32);
2999 _rsbs(T, ConstR, NonConstF);
3000 Context.insert(InstFakeUse::create(Func, T));
3001 }
3002 return CondWhenTrue(getIcmp32Mapping(Condition));
3003 }
3004
3005 Variable *Src0R = makeReg(IceType_i32);
3006 Operand *ShAmtF =
3007 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex);
3008 _lsl(Src0R, legalizeToReg(Src0), ShAmtF);
3009
2754 Variable *Src1R = legalizeToReg(Src1); 3010 Variable *Src1R = legalizeToReg(Src1);
2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( 3011 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(
2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); 3012 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);
2757 _cmp(Src0R, Src1RShifted); 3013 _cmp(Src0R, Src1F);
2758 } else { 3014 return CondWhenTrue(getIcmp32Mapping(Condition));
2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2760 _cmp(Src0R, Src1RF);
2761 } 3015 }
2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); 3016 case IceType_i32: {
3017 if (NonConstOp != nullptr) {
3018 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
3019 Value == 0) {
3020 Variable *T = makeReg(IceType_i32);
3021 Variable *OpR = legalizeToReg(NonConstOp);
3022 _orrs(T, OpR, OpR);
3023 Context.insert(InstFakeUse::create(Func, T));
3024 return CondWhenTrue(getIcmp32Mapping(Condition));
3025 }
3026
3027 Operand *ConstRF =
3028 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
3029 Variable *NonConstR = legalizeToReg(NonConstOp);
3030
3031 if (Src0 == NonConstOp) {
3032 _cmp(NonConstR, ConstRF);
3033 } else {
3034 Variable *T = makeReg(IceType_i32);
3035 _rsbs(T, NonConstR, ConstRF);
3036 Context.insert(InstFakeUse::create(Func, T));
3037 }
3038 return CondWhenTrue(getIcmp32Mapping(Condition));
3039 }
3040
3041 Variable *Src0R = legalizeToReg(Src0);
3042 Variable *Src1R = legalizeToReg(Src1);
3043 _cmp(Src0R, Src1R);
3044 return CondWhenTrue(getIcmp32Mapping(Condition));
3045 }
3046 }
2763 } 3047 }
2764 3048
2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { 3049 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
2766 Variable *Dest = Inst->getDest(); 3050 Variable *Dest = Inst->getDest();
2767 3051
2768 if (isVectorType(Dest->getType())) { 3052 if (isVectorType(Dest->getType())) {
2769 Variable *T = makeReg(Dest->getType()); 3053 Variable *T = makeReg(Dest->getType());
2770 Context.insert(InstFakeDef::create(Func, T)); 3054 Context.insert(InstFakeDef::create(Func, T));
2771 _mov(Dest, T); 3055 _mov(Dest, T);
2772 UnimplementedError(Func->getContext()->getFlags()); 3056 UnimplementedError(Func->getContext()->getFlags());
(...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after
4247 } 4531 }
4248 return Reg; 4532 return Reg;
4249 } 4533 }
4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 4534 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
4251 Variable *Reg = makeReg(Ty, RegNum); 4535 Variable *Reg = makeReg(Ty, RegNum);
4252 _movw(Reg, C); 4536 _movw(Reg, C);
4253 _movt(Reg, C); 4537 _movt(Reg, C);
4254 return Reg; 4538 return Reg;
4255 } else { 4539 } else {
4256 assert(isScalarFloatingType(Ty)); 4540 assert(isScalarFloatingType(Ty));
4541 uint32_t ModifiedImm;
4542 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
4543 Variable *T = makeReg(Ty, RegNum);
4544 _mov(T,
4545 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
4546 return T;
4547 }
4548
4257 // Load floats/doubles from literal pool. 4549 // Load floats/doubles from literal pool.
4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an
4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
4260 // immediate constants". Or, for 32-bit floating point numbers, just
4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG
4262 // instead of using a movw/movt pair to get the const-pool address then
4263 // loading to SREG.
4264 std::string Buffer; 4550 std::string Buffer;
4265 llvm::raw_string_ostream StrBuf(Buffer); 4551 llvm::raw_string_ostream StrBuf(Buffer);
4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); 4552 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
4267 llvm::cast<Constant>(From)->setShouldBePooled(true); 4553 llvm::cast<Constant>(From)->setShouldBePooled(true);
4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 4554 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
4269 Variable *BaseReg = makeReg(getPointerType()); 4555 Variable *BaseReg = makeReg(getPointerType());
4270 _movw(BaseReg, Offset); 4556 _movw(BaseReg, Offset);
4271 _movt(BaseReg, Offset); 4557 _movt(BaseReg, Offset);
4272 From = formMemoryOperand(BaseReg, Ty); 4558 From = formMemoryOperand(BaseReg, Ty);
4273 return copyToReg(From, RegNum); 4559 return copyToReg(From, RegNum);
(...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5185 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
4900 // However, for compatibility with current NaCl LLVM, don't claim that. 5186 // However, for compatibility with current NaCl LLVM, don't claim that.
4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5187 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
4902 } 5188 }
4903 5189
4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5190 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5191 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
4906 llvm::SmallBitVector TargetARM32::ScratchRegs; 5192 llvm::SmallBitVector TargetARM32::ScratchRegs;
4907 5193
4908 } // end of namespace Ice 5194 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698