OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1290 } | 1290 } |
1291 _mov(Dest, SP); | 1291 _mov(Dest, SP); |
1292 } | 1292 } |
1293 | 1293 |
1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { | 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) | 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) |
1296 return; | 1296 return; |
1297 Variable *SrcLoReg = legalizeToReg(SrcLo); | 1297 Variable *SrcLoReg = legalizeToReg(SrcLo); |
1298 switch (Ty) { | 1298 switch (Ty) { |
1299 default: | 1299 default: |
1300 llvm_unreachable("Unexpected type"); | 1300 llvm::report_fatal_error("Unexpected type"); |
1301 case IceType_i8: { | 1301 case IceType_i8: |
1302 Operand *Mask = | |
1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); | |
1304 _tst(SrcLoReg, Mask); | |
1305 break; | |
1306 } | |
1307 case IceType_i16: { | 1302 case IceType_i16: { |
1308 Operand *Mask = | 1303 Operand *ShAmtF = |
1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); | 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)), |
1310 _tst(SrcLoReg, Mask); | 1305 Legal_Reg | Legal_Flex); |
1311 break; | 1306 Variable *T = makeReg(IceType_i32); |
1312 } | 1307 _lsls(T, SrcLoReg, ShAmtF); |
1308 Context.insert(InstFakeUse::create(Func, T)); | |
1309 } break; | |
1313 case IceType_i32: { | 1310 case IceType_i32: { |
1314 _tst(SrcLoReg, SrcLoReg); | 1311 _tst(SrcLoReg, SrcLoReg); |
1315 break; | 1312 break; |
1316 } | 1313 } |
1317 case IceType_i64: { | 1314 case IceType_i64: { |
1318 Variable *ScratchReg = makeReg(IceType_i32); | 1315 Variable *T = makeReg(IceType_i32); |
1319 _orrs(ScratchReg, SrcLoReg, SrcHi); | 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); |
1320 // ScratchReg isn't going to be used, but we need the side-effect of | 1317 // T isn't going to be used, but we need the side-effect of setting flags |
1321 // setting flags from this operation. | 1318 // from this operation. |
1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 1319 Context.insert(InstFakeUse::create(Func, T)); |
1323 } | 1320 } |
1324 } | 1321 } |
1325 InstARM32Label *Label = InstARM32Label::create(Func, this); | 1322 InstARM32Label *Label = InstARM32Label::create(Func, this); |
1326 _br(Label, CondARM32::NE); | 1323 _br(Label, CondARM32::NE); |
1327 _trap(); | 1324 _trap(); |
1328 Context.insert(Label); | 1325 Context.insert(Label); |
1329 } | 1326 } |
1330 | 1327 |
1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
1332 Operand *Src1, ExtInstr ExtFunc, | 1329 Operand *Src1, ExtInstr ExtFunc, |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1397 _orr(T, Src0, Src1RF); | 1394 _orr(T, Src0, Src1RF); |
1398 break; | 1395 break; |
1399 case InstArithmetic::Xor: | 1396 case InstArithmetic::Xor: |
1400 _eor(T, Src0, Src1RF); | 1397 _eor(T, Src0, Src1RF); |
1401 break; | 1398 break; |
1402 } | 1399 } |
1403 _mov(Dest, T); | 1400 _mov(Dest, T); |
1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; | 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; |
1405 } | 1402 } |
1406 | 1403 |
1404 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, | |
1405 Variable *Dest, Operand *Src0, | |
1406 Operand *Src1) { | |
1407 // These helper-call-involved instructions are lowered in this separate | |
1408 // switch. This is because we would otherwise assume that we need to | |
1409 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | |
1410 // helper calls, and such unused/redundant instructions will fail liveness | |
1411 // analysis under -Om1 setting. | |
1412 switch (Op) { | |
1413 default: | |
1414 break; | |
1415 case InstArithmetic::Udiv: | |
1416 case InstArithmetic::Sdiv: | |
1417 case InstArithmetic::Urem: | |
1418 case InstArithmetic::Srem: { | |
1419 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
1420 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
1421 // register, which will hide a constant source operand. Instead, check | |
1422 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
1423 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | |
1424 if (C64->getValue() == 0) { | |
1425 _trap(); | |
1426 return; | |
1427 } | |
1428 } else { | |
1429 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
1430 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
1431 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
1432 } | |
1433 // Technically, ARM has their own aeabi routines, but we can use the | |
sehr
2015/11/13 21:56:29
either "has its" or "have their".
John
2015/11/14 00:00:38
For a moment I thought this was Jim. :)
Done.
| |
1434 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
1435 // the more standard __moddi3 for rem. | |
1436 const char *HelperName = ""; | |
1437 switch (Op) { | |
1438 default: | |
1439 llvm::report_fatal_error("Should have only matched div ops."); | |
1440 break; | |
1441 case InstArithmetic::Udiv: | |
1442 HelperName = H_udiv_i64; | |
1443 break; | |
1444 case InstArithmetic::Sdiv: | |
1445 HelperName = H_sdiv_i64; | |
1446 break; | |
1447 case InstArithmetic::Urem: | |
1448 HelperName = H_urem_i64; | |
1449 break; | |
1450 case InstArithmetic::Srem: | |
1451 HelperName = H_srem_i64; | |
1452 break; | |
1453 } | |
1454 constexpr SizeT MaxSrcs = 2; | |
1455 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
1456 Call->addArg(Src0); | |
1457 Call->addArg(Src1); | |
1458 lowerCall(Call); | |
1459 return; | |
1460 } | |
1461 } | |
1462 | |
1463 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1464 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1465 Variable *Src0RLo = nullptr; | |
1466 Variable *Src0RHi = nullptr; | |
1467 // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr. | |
Jim Stichnoth
2015/11/16 13:56:10
s/got/for/ ?
Lshr
| |
1468 if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) { | |
1469 Src0RLo = legalizeToReg(loOperand(Src0)); | |
1470 } | |
1471 if (Op != InstArithmetic::Shl) { | |
1472 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
1473 } | |
1474 Operand *Src1Lo = loOperand(Src1); | |
1475 Operand *Src1Hi = hiOperand(Src1); | |
1476 Variable *T_Lo = makeReg(DestLo->getType()); | |
1477 Variable *T_Hi = makeReg(DestHi->getType()); | |
1478 | |
1479 switch (Op) { | |
1480 case InstArithmetic::_num: | |
1481 llvm::report_fatal_error("Unknown arithmetic operator"); | |
1482 return; | |
1483 case InstArithmetic::Add: | |
1484 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1485 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1486 _adds(T_Lo, Src0RLo, Src1Lo); | |
1487 _mov(DestLo, T_Lo); | |
1488 _adc(T_Hi, Src0RHi, Src1Hi); | |
1489 _mov(DestHi, T_Hi); | |
1490 return; | |
1491 case InstArithmetic::And: | |
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1494 _and(T_Lo, Src0RLo, Src1Lo); | |
1495 _mov(DestLo, T_Lo); | |
1496 _and(T_Hi, Src0RHi, Src1Hi); | |
1497 _mov(DestHi, T_Hi); | |
1498 return; | |
1499 case InstArithmetic::Or: | |
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1502 _orr(T_Lo, Src0RLo, Src1Lo); | |
1503 _mov(DestLo, T_Lo); | |
1504 _orr(T_Hi, Src0RHi, Src1Hi); | |
1505 _mov(DestHi, T_Hi); | |
1506 return; | |
1507 case InstArithmetic::Xor: | |
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1510 _eor(T_Lo, Src0RLo, Src1Lo); | |
1511 _mov(DestLo, T_Lo); | |
1512 _eor(T_Hi, Src0RHi, Src1Hi); | |
1513 _mov(DestHi, T_Hi); | |
1514 return; | |
1515 case InstArithmetic::Sub: | |
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1518 _subs(T_Lo, Src0RLo, Src1Lo); | |
1519 _mov(DestLo, T_Lo); | |
1520 _sbc(T_Hi, Src0RHi, Src1Hi); | |
1521 _mov(DestHi, T_Hi); | |
1522 return; | |
1523 case InstArithmetic::Mul: { | |
1524 // GCC 4.8 does: | |
1525 // a=b*c ==> | |
1526 // t_acc =(mul) (b.lo * c.hi) | |
1527 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
1528 // t.hi,t.lo =(umull) b.lo * c.lo | |
1529 // t.hi += t_acc | |
1530 // a.lo = t.lo | |
1531 // a.hi = t.hi | |
1532 // | |
1533 // LLVM does: | |
1534 // t.hi,t.lo =(umull) b.lo * c.lo | |
1535 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
1536 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
1537 // a.lo = t.lo | |
1538 // a.hi = t.hi | |
1539 // | |
1540 // LLVM's lowering has fewer instructions, but more register pressure: | |
1541 // t.lo is live from beginning to end, while GCC delays the two-dest | |
1542 // instruction till the end, and kills c.hi immediately. | |
1543 Variable *T_Acc = makeReg(IceType_i32); | |
1544 Variable *T_Acc1 = makeReg(IceType_i32); | |
1545 Variable *T_Hi1 = makeReg(IceType_i32); | |
1546 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1547 Variable *Src1RHi = legalizeToReg(Src1Hi); | |
1548 _mul(T_Acc, Src0RLo, Src1RHi); | |
1549 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
1550 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
1551 _add(T_Hi, T_Hi1, T_Acc1); | |
1552 _mov(DestLo, T_Lo); | |
1553 _mov(DestHi, T_Hi); | |
1554 return; | |
1555 } | |
1556 case InstArithmetic::Shl: { | |
1557 assert(Src0RLo != nullptr); | |
1558 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | |
1559 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | |
1560 const int32_t ShAmtImm = C->getValue() & 0x3F; | |
1561 if (ShAmtImm == 0) { | |
1562 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
1563 _mov(DestLo, Src0RLo); | |
1564 _mov(DestHi, Src0RHi); | |
1565 return; | |
1566 } | |
1567 | |
1568 if (ShAmtImm >= 32) { | |
1569 if (ShAmtImm == 32) { | |
1570 _mov(DestHi, Src0RLo); | |
1571 } else { | |
1572 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), | |
1573 Legal_Reg | Legal_Flex); | |
1574 _lsl(T_Hi, Src0RLo, ShAmtOp); | |
1575 _mov(DestHi, T_Hi); | |
1576 } | |
1577 | |
1578 Operand *_0 = | |
1579 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | |
1580 _mov(T_Lo, _0); | |
1581 _mov(DestLo, T_Lo); | |
1582 return; | |
1583 } | |
1584 | |
1585 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
1586 Operand *ShAmtOp = | |
1587 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); | |
1588 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), | |
1589 Legal_Reg | Legal_Flex); | |
1590 _lsl(T_Hi, Src0RHi, ShAmtOp); | |
1591 _orr(T_Hi, T_Hi, | |
1592 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | |
1593 OperandARM32::LSR, ComplShAmtOp)); | |
1594 _mov(DestHi, T_Hi); | |
1595 | |
1596 _lsl(T_Lo, Src0RLo, ShAmtOp); | |
1597 _mov(DestLo, T_Lo); | |
1598 return; | |
1599 } | |
1600 | |
1601 // a=b<<c ==> | |
1602 // pnacl-llc does: | |
1603 // mov t_b.lo, b.lo | |
1604 // mov t_b.hi, b.hi | |
1605 // mov t_c.lo, c.lo | |
1606 // rsb T0, t_c.lo, #32 | |
1607 // lsr T1, t_b.lo, T0 | |
1608 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
1609 // sub T2, t_c.lo, #32 | |
1610 // cmp T2, #0 | |
1611 // lslge t_a.hi, t_b.lo, T2 | |
1612 // lsl t_a.lo, t_b.lo, t_c.lo | |
1613 // mov a.lo, t_a.lo | |
1614 // mov a.hi, t_a.hi | |
1615 // | |
1616 // GCC 4.8 does: | |
1617 // sub t_c1, c.lo, #32 | |
1618 // lsl t_hi, b.hi, c.lo | |
1619 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
1620 // rsb t_c2, c.lo, #32 | |
1621 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
1622 // lsl t_lo, b.lo, c.lo | |
1623 // a.lo = t_lo | |
1624 // a.hi = t_hi | |
1625 // | |
1626 // These are incompatible, therefore we mimic pnacl-llc. | |
1627 // Can be strength-reduced for constant-shifts, but we don't do that for | |
1628 // now. | |
1629 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
1630 // ARM, shifts only take the lower 8 bits of the shift register, and | |
1631 // saturate to the range 0-32, so the negative value will saturate to 32. | |
1632 Constant *_32 = Ctx->getConstantInt32(32); | |
1633 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1634 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
1635 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1636 Variable *T0 = makeReg(IceType_i32); | |
1637 Variable *T1 = makeReg(IceType_i32); | |
1638 Variable *T2 = makeReg(IceType_i32); | |
1639 Variable *TA_Hi = makeReg(IceType_i32); | |
1640 Variable *TA_Lo = makeReg(IceType_i32); | |
1641 _rsb(T0, Src1RLo, _32); | |
1642 _lsr(T1, Src0RLo, T0); | |
1643 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1644 OperandARM32::LSL, Src1RLo)); | |
1645 _sub(T2, Src1RLo, _32); | |
1646 _cmp(T2, _0); | |
1647 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
1648 _set_dest_redefined(); | |
1649 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
1650 _mov(DestLo, TA_Lo); | |
1651 _mov(DestHi, TA_Hi); | |
1652 return; | |
1653 } | |
1654 case InstArithmetic::Lshr: | |
1655 case InstArithmetic::Ashr: { | |
1656 assert(Src0RHi != nullptr); | |
1657 const bool ASR = Op == InstArithmetic::Ashr; | |
1658 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | |
1659 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | |
1660 const int32_t ShAmtImm = C->getValue() & 0x3F; | |
1661 if (ShAmtImm == 0) { | |
1662 Src0RLo = legalizeToReg(loOperand(Src0)); | |
1663 _mov(DestLo, Src0RLo); | |
1664 _mov(DestHi, Src0RHi); | |
1665 return; | |
1666 } | |
1667 | |
1668 if (ShAmtImm >= 32) { | |
1669 if (ShAmtImm == 32) { | |
1670 _mov(DestLo, Src0RHi); | |
1671 } else { | |
1672 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), | |
1673 Legal_Reg | Legal_Flex); | |
1674 if (ASR) { | |
1675 _asr(T_Lo, Src0RHi, ShAmtOp); | |
1676 } else { | |
1677 _lsr(T_Lo, Src0RHi, ShAmtOp); | |
1678 } | |
1679 _mov(DestLo, T_Lo); | |
1680 } | |
1681 | |
1682 if (ASR) { | |
1683 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32), | |
1684 Legal_Reg | Legal_Flex); | |
1685 _asr(T_Hi, Src0RHi, _31); | |
1686 } else { | |
1687 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), | |
1688 Legal_Reg | Legal_Flex); | |
1689 _mov(T_Hi, _0); | |
1690 } | |
1691 _mov(DestHi, T_Hi); | |
1692 return; | |
1693 } | |
1694 | |
1695 Src0RLo = legalizeToReg(loOperand(Src0)); | |
1696 Operand *ShAmtOp = | |
1697 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); | |
1698 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), | |
1699 Legal_Reg | Legal_Flex); | |
1700 _lsr(T_Lo, Src0RLo, ShAmtOp); | |
1701 _orr(T_Lo, T_Lo, | |
1702 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1703 OperandARM32::LSL, ComplShAmtOp)); | |
1704 _mov(DestLo, T_Lo); | |
1705 | |
1706 if (ASR) { | |
1707 _asr(T_Hi, Src0RHi, ShAmtOp); | |
1708 } else { | |
1709 _lsr(T_Hi, Src0RHi, ShAmtOp); | |
1710 } | |
1711 _mov(DestHi, T_Hi); | |
1712 return; | |
1713 } | |
1714 | |
1715 // a=b>>c | |
1716 // pnacl-llc does: | |
1717 // mov t_b.lo, b.lo | |
1718 // mov t_b.hi, b.hi | |
1719 // mov t_c.lo, c.lo | |
1720 // lsr T0, t_b.lo, t_c.lo | |
1721 // rsb T1, t_c.lo, #32 | |
1722 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
1723 // sub T2, t_c.lo, #32 | |
1724 // cmp T2, #0 | |
1725 // [al]srge t_a.lo, t_b.hi, T2 | |
1726 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
1727 // mov a.lo, t_a.lo | |
1728 // mov a.hi, t_a.hi | |
1729 // | |
1730 // GCC 4.8 does (lsr): | |
1731 // rsb t_c1, c.lo, #32 | |
1732 // lsr t_lo, b.lo, c.lo | |
1733 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
1734 // sub t_c2, c.lo, #32 | |
1735 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
1736 // lsr t_hi, b.hi, c.lo | |
1737 // mov a.lo, t_lo | |
1738 // mov a.hi, t_hi | |
1739 // | |
1740 // These are incompatible, therefore we mimic pnacl-llc. | |
1741 const bool IsAshr = Op == InstArithmetic::Ashr; | |
1742 Constant *_32 = Ctx->getConstantInt32(32); | |
1743 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1744 Src0RLo = legalizeToReg(loOperand(Src0)); | |
1745 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1746 Variable *T0 = makeReg(IceType_i32); | |
1747 Variable *T1 = makeReg(IceType_i32); | |
1748 Variable *T2 = makeReg(IceType_i32); | |
1749 Variable *TA_Lo = makeReg(IceType_i32); | |
1750 Variable *TA_Hi = makeReg(IceType_i32); | |
1751 _lsr(T0, Src0RLo, Src1RLo); | |
1752 _rsb(T1, Src1RLo, _32); | |
1753 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1754 OperandARM32::LSL, T1)); | |
1755 _sub(T2, Src1RLo, _32); | |
1756 _cmp(T2, _0); | |
1757 if (IsAshr) { | |
1758 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
1759 _set_dest_redefined(); | |
1760 _asr(TA_Hi, Src0RHi, Src1RLo); | |
1761 } else { | |
1762 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
1763 _set_dest_redefined(); | |
1764 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
1765 } | |
1766 _mov(DestLo, TA_Lo); | |
1767 _mov(DestHi, TA_Hi); | |
1768 return; | |
1769 } | |
1770 case InstArithmetic::Fadd: | |
1771 case InstArithmetic::Fsub: | |
1772 case InstArithmetic::Fmul: | |
1773 case InstArithmetic::Fdiv: | |
1774 case InstArithmetic::Frem: | |
1775 llvm::report_fatal_error("FP instruction with i64 type"); | |
1776 return; | |
1777 case InstArithmetic::Udiv: | |
1778 case InstArithmetic::Sdiv: | |
1779 case InstArithmetic::Urem: | |
1780 case InstArithmetic::Srem: | |
1781 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " | |
1782 "should have already been handled before"); | |
1783 return; | |
1784 } | |
1785 } | |
1786 | |
1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 1787 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
1408 Variable *Dest = Inst->getDest(); | 1788 Variable *Dest = Inst->getDest(); |
1409 if (Dest->getType() == IceType_i1) { | 1789 if (Dest->getType() == IceType_i1) { |
1410 lowerInt1Arithmetic(Inst); | 1790 lowerInt1Arithmetic(Inst); |
1411 return; | 1791 return; |
1412 } | 1792 } |
1413 | 1793 |
1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to | 1794 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to |
1415 // legalize Src0 to flex or Src1 to flex and there is a reversible | 1795 // legalize Src0 to flex or Src1 to flex and there is a reversible |
1416 // instruction. E.g., reverse subtract with immediate, register vs register, | 1796 // instruction. E.g., reverse subtract with immediate, register vs register, |
1417 // immediate. | 1797 // immediate. |
1418 // Or it may be the case that the operands aren't swapped, but the bits can | 1798 // Or it may be the case that the operands aren't swapped, but the bits can |
1419 // be flipped and a different operation applied. E.g., use BIC (bit clear) | 1799 // be flipped and a different operation applied. E.g., use BIC (bit clear) |
1420 // instead of AND for some masks. | 1800 // instead of AND for some masks. |
1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1801 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 1802 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
1423 if (Dest->getType() == IceType_i64) { | 1803 if (Dest->getType() == IceType_i64) { |
1424 // These helper-call-involved instructions are lowered in this separate | 1804 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); |
1425 // switch. This is because we would otherwise assume that we need to | |
1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | |
1427 // helper calls, and such unused/redundant instructions will fail liveness | |
1428 // analysis under -Om1 setting. | |
1429 switch (Inst->getOp()) { | |
1430 default: | |
1431 break; | |
1432 case InstArithmetic::Udiv: | |
1433 case InstArithmetic::Sdiv: | |
1434 case InstArithmetic::Urem: | |
1435 case InstArithmetic::Srem: { | |
1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
1438 // register, which will hide a constant source operand. Instead, check | |
1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | |
1441 if (C64->getValue() == 0) { | |
1442 _trap(); | |
1443 return; | |
1444 } | |
1445 } else { | |
1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
1448 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
1449 } | |
1450 // Technically, ARM has their own aeabi routines, but we can use the | |
1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
1452 // the more standard __moddi3 for rem. | |
1453 const char *HelperName = ""; | |
1454 switch (Inst->getOp()) { | |
1455 default: | |
1456 llvm_unreachable("Should have only matched div ops."); | |
1457 break; | |
1458 case InstArithmetic::Udiv: | |
1459 HelperName = H_udiv_i64; | |
1460 break; | |
1461 case InstArithmetic::Sdiv: | |
1462 HelperName = H_sdiv_i64; | |
1463 break; | |
1464 case InstArithmetic::Urem: | |
1465 HelperName = H_urem_i64; | |
1466 break; | |
1467 case InstArithmetic::Srem: | |
1468 HelperName = H_srem_i64; | |
1469 break; | |
1470 } | |
1471 constexpr SizeT MaxSrcs = 2; | |
1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
1473 Call->addArg(Src0); | |
1474 Call->addArg(Src1); | |
1475 lowerCall(Call); | |
1476 return; | |
1477 } | |
1478 } | |
1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0)); | |
1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0)); | |
1483 Operand *Src1Lo = loOperand(Src1); | |
1484 Operand *Src1Hi = hiOperand(Src1); | |
1485 Variable *T_Lo = makeReg(DestLo->getType()); | |
1486 Variable *T_Hi = makeReg(DestHi->getType()); | |
1487 switch (Inst->getOp()) { | |
1488 case InstArithmetic::_num: | |
1489 llvm_unreachable("Unknown arithmetic operator"); | |
1490 return; | |
1491 case InstArithmetic::Add: | |
1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1494 _adds(T_Lo, Src0RLo, Src1Lo); | |
1495 _mov(DestLo, T_Lo); | |
1496 _adc(T_Hi, Src0RHi, Src1Hi); | |
1497 _mov(DestHi, T_Hi); | |
1498 return; | |
1499 case InstArithmetic::And: | |
1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1502 _and(T_Lo, Src0RLo, Src1Lo); | |
1503 _mov(DestLo, T_Lo); | |
1504 _and(T_Hi, Src0RHi, Src1Hi); | |
1505 _mov(DestHi, T_Hi); | |
1506 return; | |
1507 case InstArithmetic::Or: | |
1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1510 _orr(T_Lo, Src0RLo, Src1Lo); | |
1511 _mov(DestLo, T_Lo); | |
1512 _orr(T_Hi, Src0RHi, Src1Hi); | |
1513 _mov(DestHi, T_Hi); | |
1514 return; | |
1515 case InstArithmetic::Xor: | |
1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1518 _eor(T_Lo, Src0RLo, Src1Lo); | |
1519 _mov(DestLo, T_Lo); | |
1520 _eor(T_Hi, Src0RHi, Src1Hi); | |
1521 _mov(DestHi, T_Hi); | |
1522 return; | |
1523 case InstArithmetic::Sub: | |
1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
1526 _subs(T_Lo, Src0RLo, Src1Lo); | |
1527 _mov(DestLo, T_Lo); | |
1528 _sbc(T_Hi, Src0RHi, Src1Hi); | |
1529 _mov(DestHi, T_Hi); | |
1530 return; | |
1531 case InstArithmetic::Mul: { | |
1532 // GCC 4.8 does: | |
1533 // a=b*c ==> | |
1534 // t_acc =(mul) (b.lo * c.hi) | |
1535 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
1536 // t.hi,t.lo =(umull) b.lo * c.lo | |
1537 // t.hi += t_acc | |
1538 // a.lo = t.lo | |
1539 // a.hi = t.hi | |
1540 // | |
1541 // LLVM does: | |
1542 // t.hi,t.lo =(umull) b.lo * c.lo | |
1543 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
1544 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
1545 // a.lo = t.lo | |
1546 // a.hi = t.hi | |
1547 // | |
1548 // LLVM's lowering has fewer instructions, but more register pressure: | |
1549 // t.lo is live from beginning to end, while GCC delays the two-dest | |
1550 // instruction till the end, and kills c.hi immediately. | |
1551 Variable *T_Acc = makeReg(IceType_i32); | |
1552 Variable *T_Acc1 = makeReg(IceType_i32); | |
1553 Variable *T_Hi1 = makeReg(IceType_i32); | |
1554 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1555 Variable *Src1RHi = legalizeToReg(Src1Hi); | |
1556 _mul(T_Acc, Src0RLo, Src1RHi); | |
1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
1559 _add(T_Hi, T_Hi1, T_Acc1); | |
1560 _mov(DestLo, T_Lo); | |
1561 _mov(DestHi, T_Hi); | |
1562 return; | |
1563 } | |
1564 case InstArithmetic::Shl: { | |
1565 // a=b<<c ==> | |
1566 // pnacl-llc does: | |
1567 // mov t_b.lo, b.lo | |
1568 // mov t_b.hi, b.hi | |
1569 // mov t_c.lo, c.lo | |
1570 // rsb T0, t_c.lo, #32 | |
1571 // lsr T1, t_b.lo, T0 | |
1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
1573 // sub T2, t_c.lo, #32 | |
1574 // cmp T2, #0 | |
1575 // lslge t_a.hi, t_b.lo, T2 | |
1576 // lsl t_a.lo, t_b.lo, t_c.lo | |
1577 // mov a.lo, t_a.lo | |
1578 // mov a.hi, t_a.hi | |
1579 // | |
1580 // GCC 4.8 does: | |
1581 // sub t_c1, c.lo, #32 | |
1582 // lsl t_hi, b.hi, c.lo | |
1583 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
1584 // rsb t_c2, c.lo, #32 | |
1585 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
1586 // lsl t_lo, b.lo, c.lo | |
1587 // a.lo = t_lo | |
1588 // a.hi = t_hi | |
1589 // | |
1590 // These are incompatible, therefore we mimic pnacl-llc. | |
1591 // Can be strength-reduced for constant-shifts, but we don't do that for | |
1592 // now. | |
1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
1594 // ARM, shifts only take the lower 8 bits of the shift register, and | |
1595 // saturate to the range 0-32, so the negative value will saturate to 32. | |
1596 Constant *_32 = Ctx->getConstantInt32(32); | |
1597 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1598 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1599 Variable *T0 = makeReg(IceType_i32); | |
1600 Variable *T1 = makeReg(IceType_i32); | |
1601 Variable *T2 = makeReg(IceType_i32); | |
1602 Variable *TA_Hi = makeReg(IceType_i32); | |
1603 Variable *TA_Lo = makeReg(IceType_i32); | |
1604 _rsb(T0, Src1RLo, _32); | |
1605 _lsr(T1, Src0RLo, T0); | |
1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1607 OperandARM32::LSL, Src1RLo)); | |
1608 _sub(T2, Src1RLo, _32); | |
1609 _cmp(T2, _0); | |
1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
1611 _set_dest_redefined(); | |
1612 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
1613 _mov(DestLo, TA_Lo); | |
1614 _mov(DestHi, TA_Hi); | |
1615 return; | |
1616 } | |
1617 case InstArithmetic::Lshr: | |
1618 case InstArithmetic::Ashr: { | |
1619 // a=b>>c | |
1620 // pnacl-llc does: | |
1621 // mov t_b.lo, b.lo | |
1622 // mov t_b.hi, b.hi | |
1623 // mov t_c.lo, c.lo | |
1624 // lsr T0, t_b.lo, t_c.lo | |
1625 // rsb T1, t_c.lo, #32 | |
1626 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
1627 // sub T2, t_c.lo, #32 | |
1628 // cmp T2, #0 | |
1629 // [al]srge t_a.lo, t_b.hi, T2 | |
1630 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
1631 // mov a.lo, t_a.lo | |
1632 // mov a.hi, t_a.hi | |
1633 // | |
1634 // GCC 4.8 does (lsr): | |
1635 // rsb t_c1, c.lo, #32 | |
1636 // lsr t_lo, b.lo, c.lo | |
1637 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
1638 // sub t_c2, c.lo, #32 | |
1639 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
1640 // lsr t_hi, b.hi, c.lo | |
1641 // mov a.lo, t_lo | |
1642 // mov a.hi, t_hi | |
1643 // | |
1644 // These are incompatible, therefore we mimic pnacl-llc. | |
1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | |
1646 Constant *_32 = Ctx->getConstantInt32(32); | |
1647 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
1648 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
1649 Variable *T0 = makeReg(IceType_i32); | |
1650 Variable *T1 = makeReg(IceType_i32); | |
1651 Variable *T2 = makeReg(IceType_i32); | |
1652 Variable *TA_Lo = makeReg(IceType_i32); | |
1653 Variable *TA_Hi = makeReg(IceType_i32); | |
1654 _lsr(T0, Src0RLo, Src1RLo); | |
1655 _rsb(T1, Src1RLo, _32); | |
1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
1657 OperandARM32::LSL, T1)); | |
1658 _sub(T2, Src1RLo, _32); | |
1659 _cmp(T2, _0); | |
1660 if (IsAshr) { | |
1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
1662 _set_dest_redefined(); | |
1663 _asr(TA_Hi, Src0RHi, Src1RLo); | |
1664 } else { | |
1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
1666 _set_dest_redefined(); | |
1667 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
1668 } | |
1669 _mov(DestLo, TA_Lo); | |
1670 _mov(DestHi, TA_Hi); | |
1671 return; | |
1672 } | |
1673 case InstArithmetic::Fadd: | |
1674 case InstArithmetic::Fsub: | |
1675 case InstArithmetic::Fmul: | |
1676 case InstArithmetic::Fdiv: | |
1677 case InstArithmetic::Frem: | |
1678 llvm_unreachable("FP instruction with i64 type"); | |
1679 return; | |
1680 case InstArithmetic::Udiv: | |
1681 case InstArithmetic::Sdiv: | |
1682 case InstArithmetic::Urem: | |
1683 case InstArithmetic::Srem: | |
1684 llvm_unreachable("Call-helper-involved instruction for i64 type " | |
1685 "should have already been handled before"); | |
1686 return; | |
1687 } | |
1688 return; | 1805 return; |
1689 } else if (isVectorType(Dest->getType())) { | 1806 } |
1807 | |
1808 if (isVectorType(Dest->getType())) { | |
1690 // Add a fake def to keep liveness consistent in the meantime. | 1809 // Add a fake def to keep liveness consistent in the meantime. |
1691 Variable *T = makeReg(Dest->getType()); | 1810 Variable *T = makeReg(Dest->getType()); |
1692 Context.insert(InstFakeDef::create(Func, T)); | 1811 Context.insert(InstFakeDef::create(Func, T)); |
1693 _mov(Dest, T); | 1812 _mov(Dest, T); |
1694 UnimplementedError(Func->getContext()->getFlags()); | 1813 UnimplementedError(Func->getContext()->getFlags()); |
1695 return; | 1814 return; |
1696 } | 1815 } |
1816 | |
1697 // Dest->getType() is a non-i64 scalar. | 1817 // Dest->getType() is a non-i64 scalar. |
1698 Variable *Src0R = legalizeToReg(Src0); | 1818 Variable *Src0R = legalizeToReg(Src0); |
1699 Variable *T = makeReg(Dest->getType()); | 1819 Variable *T = makeReg(Dest->getType()); |
1820 | |
1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect | 1821 // Handle div/rem separately. They require a non-legalized Src1 to inspect |
1701 // whether or not Src1 is a non-zero constant. Once legalized it is more | 1822 // whether or not Src1 is a non-zero constant. Once legalized it is more |
1702 // difficult to determine (constant may be moved to a register). | 1823 // difficult to determine (constant may be moved to a register). |
1703 switch (Inst->getOp()) { | 1824 switch (Inst->getOp()) { |
1704 default: | 1825 default: |
1705 break; | 1826 break; |
1706 case InstArithmetic::Udiv: { | 1827 case InstArithmetic::Udiv: { |
1707 constexpr bool NotRemainder = false; | 1828 constexpr bool NotRemainder = false; |
1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 1829 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
1709 H_udiv_i32, NotRemainder); | 1830 H_udiv_i32, NotRemainder); |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1766 Variable *Src1R = legalizeToReg(Src1); | 1887 Variable *Src1R = legalizeToReg(Src1); |
1767 _vdiv(T, Src0R, Src1R); | 1888 _vdiv(T, Src0R, Src1R); |
1768 _mov(Dest, T); | 1889 _mov(Dest, T); |
1769 return; | 1890 return; |
1770 } | 1891 } |
1771 } | 1892 } |
1772 | 1893 |
1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 1894 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
1774 switch (Inst->getOp()) { | 1895 switch (Inst->getOp()) { |
1775 case InstArithmetic::_num: | 1896 case InstArithmetic::_num: |
1776 llvm_unreachable("Unknown arithmetic operator"); | 1897 llvm::report_fatal_error("Unknown arithmetic operator"); |
1777 return; | 1898 return; |
1778 case InstArithmetic::Add: | 1899 case InstArithmetic::Add: |
1779 _add(T, Src0R, Src1RF); | 1900 _add(T, Src0R, Src1RF); |
1780 _mov(Dest, T); | 1901 _mov(Dest, T); |
1781 return; | 1902 return; |
1782 case InstArithmetic::And: | 1903 case InstArithmetic::And: |
1783 _and(T, Src0R, Src1RF); | 1904 _and(T, Src0R, Src1RF); |
1784 _mov(Dest, T); | 1905 _mov(Dest, T); |
1785 return; | 1906 return; |
1786 case InstArithmetic::Or: | 1907 case InstArithmetic::Or: |
(...skipping 29 matching lines...) Expand all Loading... | |
1816 if (Dest->getType() != IceType_i32) { | 1937 if (Dest->getType() != IceType_i32) { |
1817 _sxt(Src0R, Src0R); | 1938 _sxt(Src0R, Src0R); |
1818 } | 1939 } |
1819 _asr(T, Src0R, Src1RF); | 1940 _asr(T, Src0R, Src1RF); |
1820 _mov(Dest, T); | 1941 _mov(Dest, T); |
1821 return; | 1942 return; |
1822 case InstArithmetic::Udiv: | 1943 case InstArithmetic::Udiv: |
1823 case InstArithmetic::Sdiv: | 1944 case InstArithmetic::Sdiv: |
1824 case InstArithmetic::Urem: | 1945 case InstArithmetic::Urem: |
1825 case InstArithmetic::Srem: | 1946 case InstArithmetic::Srem: |
1826 llvm_unreachable("Integer div/rem should have been handled earlier."); | 1947 llvm::report_fatal_error( |
1948 "Integer div/rem should have been handled earlier."); | |
1827 return; | 1949 return; |
1828 case InstArithmetic::Fadd: | 1950 case InstArithmetic::Fadd: |
1829 case InstArithmetic::Fsub: | 1951 case InstArithmetic::Fsub: |
1830 case InstArithmetic::Fmul: | 1952 case InstArithmetic::Fmul: |
1831 case InstArithmetic::Fdiv: | 1953 case InstArithmetic::Fdiv: |
1832 case InstArithmetic::Frem: | 1954 case InstArithmetic::Frem: |
1833 llvm_unreachable("Floating point arith should have been handled earlier."); | 1955 llvm::report_fatal_error( |
1956 "Floating point arith should have been handled earlier."); | |
1834 return; | 1957 return; |
1835 } | 1958 } |
1836 } | 1959 } |
1837 | 1960 |
1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { | 1961 void TargetARM32::lowerAssign(const InstAssign *Inst) { |
1839 Variable *Dest = Inst->getDest(); | 1962 Variable *Dest = Inst->getDest(); |
1840 Operand *Src0 = Inst->getSrc(0); | 1963 Operand *Src0 = Inst->getSrc(0); |
1841 assert(Dest->getType() == Src0->getType()); | 1964 assert(Dest->getType() == Src0->getType()); |
1842 if (Dest->getType() == IceType_i64) { | 1965 if (Dest->getType() == IceType_i64) { |
1843 Src0 = legalizeUndef(Src0); | 1966 Src0 = legalizeUndef(Src0); |
1967 | |
1968 Variable *T_Lo = makeReg(IceType_i32); | |
1969 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 1970 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1848 Variable *T_Lo = makeReg(IceType_i32); | |
1849 Variable *T_Hi = makeReg(IceType_i32); | |
1850 | |
1851 _mov(T_Lo, Src0Lo); | 1971 _mov(T_Lo, Src0Lo); |
1852 _mov(DestLo, T_Lo); | 1972 _mov(DestLo, T_Lo); |
1973 | |
1974 Variable *T_Hi = makeReg(IceType_i32); | |
1975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1976 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
1853 _mov(T_Hi, Src0Hi); | 1977 _mov(T_Hi, Src0Hi); |
1854 _mov(DestHi, T_Hi); | 1978 _mov(DestHi, T_Hi); |
1979 | |
1980 return; | |
1981 } | |
1982 | |
1983 Operand *NewSrc; | |
1984 if (Dest->hasReg()) { | |
1985 // If Dest already has a physical register, then legalize the Src operand | |
1986 // into a Variable with the same register assignment. This especially | |
1987 // helps allow the use of Flex operands. | |
1988 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
1855 } else { | 1989 } else { |
1856 Operand *NewSrc; | 1990 // Dest could be a stack operand. Since we could potentially need to do a |
1857 if (Dest->hasReg()) { | 1991 // Store (and store can only have Register operands), legalize this to a |
1858 // If Dest already has a physical register, then legalize the Src operand | 1992 // register. |
1859 // into a Variable with the same register assignment. This especially | 1993 NewSrc = legalize(Src0, Legal_Reg); |
1860 // helps allow the use of Flex operands. | |
1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
1862 } else { | |
1863 // Dest could be a stack operand. Since we could potentially need to do a | |
1864 // Store (and store can only have Register operands), legalize this to a | |
1865 // register. | |
1866 NewSrc = legalize(Src0, Legal_Reg); | |
1867 } | |
1868 if (isVectorType(Dest->getType())) { | |
1869 Variable *SrcR = legalizeToReg(NewSrc); | |
1870 _mov(Dest, SrcR); | |
1871 } else if (isFloatingType(Dest->getType())) { | |
1872 Variable *SrcR = legalizeToReg(NewSrc); | |
1873 _mov(Dest, SrcR); | |
1874 } else { | |
1875 _mov(Dest, NewSrc); | |
1876 } | |
1877 } | 1994 } |
1995 | |
1996 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { | |
1997 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); | |
1998 } | |
1999 _mov(Dest, NewSrc); | |
1878 } | 2000 } |
1879 | 2001 |
1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 2002 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 2003 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 2004 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { |
1883 InstARM32Label *NewShortCircuitLabel = nullptr; | 2005 InstARM32Label *NewShortCircuitLabel = nullptr; |
1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 2006 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
1885 | 2007 |
1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); | 2008 const Inst *Producer = BoolComputations.getProducerOf(Boolean); |
1887 | 2009 |
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2573 struct { | 2695 struct { |
2574 CondARM32::Cond CC0; | 2696 CondARM32::Cond CC0; |
2575 CondARM32::Cond CC1; | 2697 CondARM32::Cond CC1; |
2576 } TableFcmp[] = { | 2698 } TableFcmp[] = { |
2577 #define X(val, CC0, CC1) \ | 2699 #define X(val, CC0, CC1) \ |
2578 { CondARM32::CC0, CondARM32::CC1 } \ | 2700 { CondARM32::CC0, CondARM32::CC1 } \ |
2579 , | 2701 , |
2580 FCMPARM32_TABLE | 2702 FCMPARM32_TABLE |
2581 #undef X | 2703 #undef X |
2582 }; | 2704 }; |
2705 | |
sehr
2015/11/13 21:56:29
Is there a more common place for this sort of func
John
2015/11/13 22:00:41
Maybe. If you think it's useful, you could add fro
John
2015/11/14 00:00:38
Oh, I thought this was Jim. He had the same routin
| |
2706 bool isFloatingPointZero(Operand *Src) { | |
2707 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { | |
2708 return F32->getValue() == 0.0f; | |
2709 } | |
2710 | |
2711 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { | |
2712 return F64->getValue() == 0.0; | |
2713 } | |
2714 | |
2715 return false; | |
2716 } | |
2583 } // end of anonymous namespace | 2717 } // end of anonymous namespace |
2584 | 2718 |
2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 2719 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
2586 InstFcmp::FCond Condition = Instr->getCondition(); | 2720 InstFcmp::FCond Condition = Instr->getCondition(); |
2587 switch (Condition) { | 2721 switch (Condition) { |
2588 case InstFcmp::False: | 2722 case InstFcmp::False: |
2589 return CondWhenTrue(CondARM32::kNone); | 2723 return CondWhenTrue(CondARM32::kNone); |
2590 case InstFcmp::True: | 2724 case InstFcmp::True: |
2591 return CondWhenTrue(CondARM32::AL); | 2725 return CondWhenTrue(CondARM32::AL); |
2592 break; | 2726 break; |
2593 default: { | 2727 default: { |
2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); | 2728 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); | 2729 Operand *Src1 = Instr->getSrc(1); |
2596 _vcmp(Src0R, Src1R); | 2730 if (isFloatingPointZero(Src1)) { |
2731 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); | |
2732 } else { | |
2733 _vcmp(Src0R, legalizeToReg(Src1)); | |
2734 } | |
2597 _vmrs(); | 2735 _vmrs(); |
2598 assert(Condition < llvm::array_lengthof(TableFcmp)); | 2736 assert(Condition < llvm::array_lengthof(TableFcmp)); |
2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); | 2737 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); |
2600 } | 2738 } |
2601 } | 2739 } |
2602 } | 2740 } |
2603 | 2741 |
2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { | 2742 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
2605 Variable *Dest = Instr->getDest(); | 2743 Variable *Dest = Instr->getDest(); |
2606 if (isVectorType(Dest->getType())) { | 2744 if (isVectorType(Dest->getType())) { |
(...skipping 27 matching lines...) Expand all Loading... | |
2634 } else { | 2772 } else { |
2635 _mov(T, _1, Cond.WhenTrue0); | 2773 _mov(T, _1, Cond.WhenTrue0); |
2636 } | 2774 } |
2637 | 2775 |
2638 if (Cond.WhenTrue1 != CondARM32::kNone) { | 2776 if (Cond.WhenTrue1 != CondARM32::kNone) { |
2639 _mov_redefined(T, _1, Cond.WhenTrue1); | 2777 _mov_redefined(T, _1, Cond.WhenTrue1); |
2640 } | 2778 } |
2641 | 2779 |
2642 _mov(Dest, T); | 2780 _mov(Dest, T); |
2643 } | 2781 } |
2782 TargetARM32::CondWhenTrue | |
2783 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, | |
2784 Operand *Src1) { | |
2785 size_t Index = static_cast<size_t>(Condition); | |
2786 assert(Index < llvm::array_lengthof(TableIcmp64)); | |
2644 | 2787 |
2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { | 2788 Operand *NonConstOp = nullptr; |
2646 assert(Inst->getSrc(0)->getType() != IceType_i1); | 2789 uint64_t Value; |
2647 assert(Inst->getSrc(1)->getType() != IceType_i1); | 2790 if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
2791 Value = C->getValue(); | |
2792 NonConstOp = Src0; | |
2793 } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) { | |
2794 Value = C->getValue(); | |
2795 NonConstOp = Src1; | |
2796 } | |
2648 | 2797 |
2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2798 Variable *Src0RLo, *Src0RHi; |
2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2799 Operand *Src1RFLo, *Src1RFHi; |
2800 | |
2801 if (NonConstOp != nullptr) { | |
2802 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
2803 Value == 0) { | |
2804 Variable *T = makeReg(IceType_i32); | |
2805 _orrs(T, legalizeToReg(loOperand(NonConstOp)), | |
2806 legalize(hiOperand(NonConstOp), Legal_Reg | Legal_Flex)); | |
2807 Context.insert(InstFakeUse::create(Func, T)); | |
2808 return CondWhenTrue(TableIcmp64[Index].C1); | |
2809 } | |
2810 | |
2811 Src0RLo = legalizeToReg(loOperand(NonConstOp)); | |
2812 Src0RHi = legalizeToReg(hiOperand(NonConstOp)); | |
2813 if ((Value >> 32) == (Value & 0xFFFFFFFF)) { | |
2814 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF), | |
2815 Legal_Reg | Legal_Flex); | |
2816 Src1RFHi = Src1RFLo; | |
2817 } else { | |
2818 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF), | |
2819 Legal_Reg | Legal_Flex); | |
2820 Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF), | |
2821 Legal_Reg | Legal_Flex); | |
2822 } | |
2823 | |
2824 bool UseRsb = false; | |
2825 if (TableIcmp64[Index].Swapped) { | |
2826 UseRsb = NonConstOp == Src0; | |
2827 } else { | |
2828 UseRsb = NonConstOp == Src1; | |
2829 } | |
2830 | |
2831 if (UseRsb) { | |
2832 if (TableIcmp64[Index].IsSigned) { | |
2833 Variable *T = makeReg(IceType_i32); | |
2834 _rsbs(T, Src0RLo, Src1RFLo); | |
2835 Context.insert(InstFakeUse::create(Func, T)); | |
2836 | |
2837 T = makeReg(IceType_i32); | |
2838 _rscs(T, Src0RHi, Src1RFHi); | |
2839 // We need to add a FakeUse here because liveness gets mad at us (Def | |
2840 // without Use.) Note that flag-setting instructions are considered to | |
2841 // have side effects and, therefore, are not DCE'ed. | |
2842 Context.insert(InstFakeUse::create(Func, T)); | |
2843 } else { | |
2844 Variable *T = makeReg(IceType_i32); | |
2845 _rsbs(T, Src0RHi, Src1RFHi); | |
2846 Context.insert(InstFakeUse::create(Func, T)); | |
2847 | |
2848 T = makeReg(IceType_i32); | |
2849 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); | |
2850 Context.insert(InstFakeUse::create(Func, T)); | |
2851 } | |
2852 } else { | |
2853 if (TableIcmp64[Index].IsSigned) { | |
2854 _cmp(Src0RLo, Src1RFLo); | |
2855 Variable *T = makeReg(IceType_i32); | |
2856 _sbcs(T, Src0RHi, Src1RFHi); | |
2857 Context.insert(InstFakeUse::create(Func, T)); | |
2858 } else { | |
2859 _cmp(Src0RHi, Src1RFHi); | |
2860 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | |
2861 } | |
2862 } | |
2863 | |
2864 return CondWhenTrue(TableIcmp64[Index].C1); | |
2865 } | |
2866 | |
2867 if (TableIcmp64[Index].Swapped) { | |
2868 Src0RLo = legalizeToReg(loOperand(Src1)); | |
2869 Src0RHi = legalizeToReg(hiOperand(Src1)); | |
2870 Src1RFLo = legalizeToReg(loOperand(Src0)); | |
2871 Src1RFHi = legalizeToReg(hiOperand(Src0)); | |
2872 } else { | |
2873 Src0RLo = legalizeToReg(loOperand(Src0)); | |
2874 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
2875 Src1RFLo = legalizeToReg(loOperand(Src1)); | |
2876 Src1RFHi = legalizeToReg(hiOperand(Src1)); | |
2877 } | |
2651 | 2878 |
2652 // a=icmp cond, b, c ==> | 2879 // a=icmp cond, b, c ==> |
2653 // GCC does: | 2880 // GCC does: |
2654 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2881 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2882 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2656 // mov.<C1> t, #1 mov.<C1> t, #1 | 2883 // mov.<C1> t, #1 mov.<C1> t, #1 |
2657 // mov.<C2> t, #0 mov.<C2> t, #0 | 2884 // mov.<C2> t, #0 mov.<C2> t, #0 |
2658 // mov a, t mov a, t | 2885 // mov a, t mov a, t |
2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 2886 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
2660 // is used for signed compares. In some cases, b and c need to be swapped as | 2887 // is used for signed compares. In some cases, b and c need to be swapped as |
(...skipping 10 matching lines...) Expand all Loading... | |
2671 // that's nice in that it's just as short but has fewer dependencies for | 2898 // that's nice in that it's just as short but has fewer dependencies for |
2672 // better ILP at the cost of more registers. | 2899 // better ILP at the cost of more registers. |
2673 // | 2900 // |
2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 2901 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
2675 // unconditional mov #0, two cmps, two conditional mov #1, and one | 2902 // unconditional mov #0, two cmps, two conditional mov #1, and one |
2676 // conditional reg mov. That has few dependencies for good ILP, but is a | 2903 // conditional reg mov. That has few dependencies for good ILP, but is a |
2677 // longer sequence. | 2904 // longer sequence. |
2678 // | 2905 // |
2679 // So, we are going with the GCC version since it's usually better (except | 2906 // So, we are going with the GCC version since it's usually better (except |
2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 2907 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
2908 if (TableIcmp64[Index].IsSigned) { | |
2909 Variable *ScratchReg = makeReg(IceType_i32); | |
2910 _cmp(Src0RLo, Src1RFLo); | |
2911 _sbcs(ScratchReg, Src0RHi, Src1RFHi); | |
2912 // ScratchReg isn't going to be used, but we need the side-effect of | |
2913 // setting flags from this operation. | |
2914 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
2915 } else { | |
2916 _cmp(Src0RHi, Src1RFHi); | |
2917 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | |
2918 } | |
2919 return CondWhenTrue(TableIcmp64[Index].C1); | |
2920 } | |
2681 | 2921 |
2682 if (Src0->getType() == IceType_i64) { | 2922 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
2683 InstIcmp::ICond Conditon = Inst->getCondition(); | 2923 assert(Inst->getSrc(0)->getType() != IceType_i1); |
2684 size_t Index = static_cast<size_t>(Conditon); | 2924 assert(Inst->getSrc(1)->getType() != IceType_i1); |
2685 assert(Index < llvm::array_lengthof(TableIcmp64)); | |
2686 Variable *Src0Lo, *Src0Hi; | |
2687 Operand *Src1LoRF, *Src1HiRF; | |
2688 if (TableIcmp64[Index].Swapped) { | |
2689 Src0Lo = legalizeToReg(loOperand(Src1)); | |
2690 Src0Hi = legalizeToReg(hiOperand(Src1)); | |
2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | |
2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
2693 } else { | |
2694 Src0Lo = legalizeToReg(loOperand(Src0)); | |
2695 Src0Hi = legalizeToReg(hiOperand(Src0)); | |
2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
2698 } | |
2699 if (TableIcmp64[Index].IsSigned) { | |
2700 Variable *ScratchReg = makeReg(IceType_i32); | |
2701 _cmp(Src0Lo, Src1LoRF); | |
2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | |
2703 // ScratchReg isn't going to be used, but we need the side-effect of | |
2704 // setting flags from this operation. | |
2705 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
2706 } else { | |
2707 _cmp(Src0Hi, Src1HiRF); | |
2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | |
2709 } | |
2710 return CondWhenTrue(TableIcmp64[Index].C1); | |
2711 } | |
2712 | 2925 |
2926 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | |
2927 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | |
2928 | |
2929 InstIcmp::ICond Condition = Inst->getCondition(); | |
2713 // a=icmp cond b, c ==> | 2930 // a=icmp cond b, c ==> |
2714 // GCC does: | 2931 // GCC does: |
2715 // <u/s>xtb tb, b | 2932 // <u/s>xtb tb, b |
2716 // <u/s>xtb tc, c | 2933 // <u/s>xtb tc, c |
2717 // cmp tb, tc | 2934 // cmp tb, tc |
2718 // mov.C1 t, #0 | 2935 // mov.C1 t, #0 |
2719 // mov.C2 t, #1 | 2936 // mov.C2 t, #1 |
2720 // mov a, t | 2937 // mov a, t |
2721 // where the unsigned/sign extension is not needed for 32-bit. They also have | 2938 // where the unsigned/sign extension is not needed for 32-bit. They also have |
2722 // special cases for EQ and NE. E.g., for NE: | 2939 // special cases for EQ and NE. E.g., for NE: |
2723 // <extend to tb, tc> | 2940 // <extend to tb, tc> |
2724 // subs t, tb, tc | 2941 // subs t, tb, tc |
2725 // movne t, #1 | 2942 // movne t, #1 |
2726 // mov a, t | 2943 // mov a, t |
2727 // | 2944 // |
2728 // LLVM does: | 2945 // LLVM does: |
2729 // lsl tb, b, #<N> | 2946 // lsl tb, b, #<N> |
2730 // mov t, #0 | 2947 // mov t, #0 |
2731 // cmp tb, c, lsl #<N> | 2948 // cmp tb, c, lsl #<N> |
2732 // mov.<C> t, #1 | 2949 // mov.<C> t, #1 |
2733 // mov a, t | 2950 // mov a, t |
2734 // | 2951 // |
2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on | 2952 // the left shift is by 0, 16, or 24, which allows the comparison to focus on |
2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 2953 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb | 2954 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
2738 // first. It's not clear to me why that special-casing is needed. | 2955 // first. It's not clear to me why that special-casing is needed. |
2739 // | 2956 // |
2740 // We'll go with the LLVM way for now, since it's shorter and has just as few | 2957 // We'll go with the LLVM way for now, since it's shorter and has just as few |
2741 // dependencies. | 2958 // dependencies. |
2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 2959 Operand *NonConstOp = nullptr; |
2743 assert(ShiftAmt >= 0); | 2960 int32_t Value; |
2744 Constant *ShiftConst = nullptr; | 2961 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
2745 Variable *Src0R = nullptr; | 2962 Value = C->getValue(); |
2746 if (ShiftAmt) { | 2963 NonConstOp = Src0; |
2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 2964 } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) { |
2748 Src0R = makeReg(IceType_i32); | 2965 Value = C->getValue(); |
2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 2966 NonConstOp = Src1; |
2750 } else { | |
2751 Src0R = legalizeToReg(Src0); | |
2752 } | 2967 } |
2753 if (ShiftAmt) { | 2968 |
2969 switch (Src0->getType()) { | |
2970 default: | |
2971 llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); | |
2972 case IceType_i64: | |
2973 return lowerInt64IcmpCond(Condition, Src0, Src1); | |
2974 case IceType_i8: | |
2975 case IceType_i16: { | |
2976 int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); | |
2977 assert(ShAmt >= 0); | |
2978 | |
2979 if (NonConstOp != nullptr) { | |
2980 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
2981 Value == 0) { | |
2982 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt); | |
2983 Variable *T = makeReg(IceType_i32); | |
2984 _lsls(T, legalizeToReg(NonConstOp), ShAmtOp); | |
2985 Context.insert(InstFakeUse::create(Func, T)); | |
2986 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
2987 } | |
2988 Variable *ConstR = makeReg(IceType_i32); | |
2989 _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt), | |
2990 Legal_Reg | Legal_Flex)); | |
2991 Operand *NonConstF = OperandARM32FlexReg::create( | |
2992 Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL, | |
2993 Ctx->getConstantInt32(ShAmt)); | |
2994 | |
2995 if (Src1 == NonConstOp) { | |
2996 _cmp(ConstR, NonConstF); | |
2997 } else { | |
2998 Variable *T = makeReg(IceType_i32); | |
2999 _rsbs(T, ConstR, NonConstF); | |
3000 Context.insert(InstFakeUse::create(Func, T)); | |
3001 } | |
3002 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
3003 } | |
3004 | |
3005 Variable *Src0R = makeReg(IceType_i32); | |
3006 Operand *ShAmtF = | |
3007 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex); | |
3008 _lsl(Src0R, legalizeToReg(Src0), ShAmtF); | |
3009 | |
2754 Variable *Src1R = legalizeToReg(Src1); | 3010 Variable *Src1R = legalizeToReg(Src1); |
2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( | 3011 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create( |
2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); | 3012 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF); |
2757 _cmp(Src0R, Src1RShifted); | 3013 _cmp(Src0R, Src1F); |
2758 } else { | 3014 return CondWhenTrue(getIcmp32Mapping(Condition)); |
2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | |
2760 _cmp(Src0R, Src1RF); | |
2761 } | 3015 } |
2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); | 3016 case IceType_i32: { |
3017 if (NonConstOp != nullptr) { | |
3018 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
3019 Value == 0) { | |
3020 Variable *T = makeReg(IceType_i32); | |
3021 Variable *OpR = legalizeToReg(NonConstOp); | |
3022 _orrs(T, OpR, OpR); | |
3023 Context.insert(InstFakeUse::create(Func, T)); | |
3024 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
3025 } | |
3026 | |
3027 Operand *ConstRF = | |
3028 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex); | |
3029 Variable *NonConstR = legalizeToReg(NonConstOp); | |
3030 | |
3031 if (Src0 == NonConstOp) { | |
3032 _cmp(NonConstR, ConstRF); | |
3033 } else { | |
3034 Variable *T = makeReg(IceType_i32); | |
3035 _rsbs(T, NonConstR, ConstRF); | |
3036 Context.insert(InstFakeUse::create(Func, T)); | |
3037 } | |
3038 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
3039 } | |
3040 | |
3041 Variable *Src0R = legalizeToReg(Src0); | |
3042 Variable *Src1R = legalizeToReg(Src1); | |
3043 _cmp(Src0R, Src1R); | |
3044 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
3045 } | |
3046 } | |
2763 } | 3047 } |
2764 | 3048 |
2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 3049 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
2766 Variable *Dest = Inst->getDest(); | 3050 Variable *Dest = Inst->getDest(); |
2767 | 3051 |
2768 if (isVectorType(Dest->getType())) { | 3052 if (isVectorType(Dest->getType())) { |
2769 Variable *T = makeReg(Dest->getType()); | 3053 Variable *T = makeReg(Dest->getType()); |
2770 Context.insert(InstFakeDef::create(Func, T)); | 3054 Context.insert(InstFakeDef::create(Func, T)); |
2771 _mov(Dest, T); | 3055 _mov(Dest, T); |
2772 UnimplementedError(Func->getContext()->getFlags()); | 3056 UnimplementedError(Func->getContext()->getFlags()); |
(...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4247 } | 4531 } |
4248 return Reg; | 4532 return Reg; |
4249 } | 4533 } |
4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 4534 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
4251 Variable *Reg = makeReg(Ty, RegNum); | 4535 Variable *Reg = makeReg(Ty, RegNum); |
4252 _movw(Reg, C); | 4536 _movw(Reg, C); |
4253 _movt(Reg, C); | 4537 _movt(Reg, C); |
4254 return Reg; | 4538 return Reg; |
4255 } else { | 4539 } else { |
4256 assert(isScalarFloatingType(Ty)); | 4540 assert(isScalarFloatingType(Ty)); |
4541 uint32_t ModifiedImm; | |
4542 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | |
4543 Variable *T = makeReg(Ty, RegNum); | |
4544 _mov(T, | |
4545 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | |
4546 return T; | |
4547 } | |
4548 | |
4257 // Load floats/doubles from literal pool. | 4549 // Load floats/doubles from literal pool. |
4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an | |
4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified | |
4260 // immediate constants". Or, for 32-bit floating point numbers, just | |
4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG | |
4262 // instead of using a movw/movt pair to get the const-pool address then | |
4263 // loading to SREG. | |
4264 std::string Buffer; | 4550 std::string Buffer; |
4265 llvm::raw_string_ostream StrBuf(Buffer); | 4551 llvm::raw_string_ostream StrBuf(Buffer); |
4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 4552 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
4267 llvm::cast<Constant>(From)->setShouldBePooled(true); | 4553 llvm::cast<Constant>(From)->setShouldBePooled(true); |
4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 4554 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
4269 Variable *BaseReg = makeReg(getPointerType()); | 4555 Variable *BaseReg = makeReg(getPointerType()); |
4270 _movw(BaseReg, Offset); | 4556 _movw(BaseReg, Offset); |
4271 _movt(BaseReg, Offset); | 4557 _movt(BaseReg, Offset); |
4272 From = formMemoryOperand(BaseReg, Ty); | 4558 From = formMemoryOperand(BaseReg, Ty); |
4273 return copyToReg(From, RegNum); | 4559 return copyToReg(From, RegNum); |
(...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 5185 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
4900 // However, for compatibility with current NaCl LLVM, don't claim that. | 5186 // However, for compatibility with current NaCl LLVM, don't claim that. |
4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 5187 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
4902 } | 5188 } |
4903 | 5189 |
4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 5190 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 5191 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
4906 llvm::SmallBitVector TargetARM32::ScratchRegs; | 5192 llvm::SmallBitVector TargetARM32::ScratchRegs; |
4907 | 5193 |
4908 } // end of namespace Ice | 5194 } // end of namespace Ice |
OLD | NEW |