Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1279 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1290 } | 1290 } |
| 1291 _mov(Dest, SP); | 1291 _mov(Dest, SP); |
| 1292 } | 1292 } |
| 1293 | 1293 |
| 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { | 1294 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
| 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) | 1295 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) |
| 1296 return; | 1296 return; |
| 1297 Variable *SrcLoReg = legalizeToReg(SrcLo); | 1297 Variable *SrcLoReg = legalizeToReg(SrcLo); |
| 1298 switch (Ty) { | 1298 switch (Ty) { |
| 1299 default: | 1299 default: |
| 1300 llvm_unreachable("Unexpected type"); | 1300 llvm::report_fatal_error("Unexpected type"); |
| 1301 case IceType_i8: { | 1301 case IceType_i8: |
| 1302 Operand *Mask = | |
| 1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); | |
| 1304 _tst(SrcLoReg, Mask); | |
| 1305 break; | |
| 1306 } | |
| 1307 case IceType_i16: { | 1302 case IceType_i16: { |
| 1308 Operand *Mask = | 1303 Operand *ShAmtF = |
| 1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); | 1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)), |
| 1310 _tst(SrcLoReg, Mask); | 1305 Legal_Reg | Legal_Flex); |
| 1311 break; | 1306 Variable *T = makeReg(IceType_i32); |
| 1312 } | 1307 _lsls(T, SrcLoReg, ShAmtF); |
| 1308 Context.insert(InstFakeUse::create(Func, T)); | |
| 1309 } break; | |
| 1313 case IceType_i32: { | 1310 case IceType_i32: { |
| 1314 _tst(SrcLoReg, SrcLoReg); | 1311 _tst(SrcLoReg, SrcLoReg); |
| 1315 break; | 1312 break; |
| 1316 } | 1313 } |
| 1317 case IceType_i64: { | 1314 case IceType_i64: { |
| 1318 Variable *ScratchReg = makeReg(IceType_i32); | 1315 Variable *T = makeReg(IceType_i32); |
| 1319 _orrs(ScratchReg, SrcLoReg, SrcHi); | 1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); |
| 1320 // ScratchReg isn't going to be used, but we need the side-effect of | 1317 // T isn't going to be used, but we need the side-effect of setting flags |
| 1321 // setting flags from this operation. | 1318 // from this operation. |
| 1322 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 1319 Context.insert(InstFakeUse::create(Func, T)); |
| 1323 } | 1320 } |
| 1324 } | 1321 } |
| 1325 InstARM32Label *Label = InstARM32Label::create(Func, this); | 1322 InstARM32Label *Label = InstARM32Label::create(Func, this); |
| 1326 _br(Label, CondARM32::NE); | 1323 _br(Label, CondARM32::NE); |
| 1327 _trap(); | 1324 _trap(); |
| 1328 Context.insert(Label); | 1325 Context.insert(Label); |
| 1329 } | 1326 } |
| 1330 | 1327 |
| 1331 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 1328 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
| 1332 Operand *Src1, ExtInstr ExtFunc, | 1329 Operand *Src1, ExtInstr ExtFunc, |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1397 _orr(T, Src0, Src1RF); | 1394 _orr(T, Src0, Src1RF); |
| 1398 break; | 1395 break; |
| 1399 case InstArithmetic::Xor: | 1396 case InstArithmetic::Xor: |
| 1400 _eor(T, Src0, Src1RF); | 1397 _eor(T, Src0, Src1RF); |
| 1401 break; | 1398 break; |
| 1402 } | 1399 } |
| 1403 _mov(Dest, T); | 1400 _mov(Dest, T); |
| 1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; | 1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; |
| 1405 } | 1402 } |
| 1406 | 1403 |
| 1404 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, | |
| 1405 Variable *Dest, Operand *Src0, | |
| 1406 Operand *Src1) { | |
| 1407 // These helper-call-involved instructions are lowered in this separate | |
| 1408 // switch. This is because we would otherwise assume that we need to | |
| 1409 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | |
| 1410 // helper calls, and such unused/redundant instructions will fail liveness | |
| 1411 // analysis under -Om1 setting. | |
| 1412 switch (Op) { | |
| 1413 default: | |
| 1414 break; | |
| 1415 case InstArithmetic::Udiv: | |
| 1416 case InstArithmetic::Sdiv: | |
| 1417 case InstArithmetic::Urem: | |
| 1418 case InstArithmetic::Srem: { | |
| 1419 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
| 1420 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
| 1421 // register, which will hide a constant source operand. Instead, check | |
| 1422 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
| 1423 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | |
| 1424 if (C64->getValue() == 0) { | |
| 1425 _trap(); | |
| 1426 return; | |
| 1427 } | |
| 1428 } else { | |
| 1429 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
| 1430 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
| 1431 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
| 1432 } | |
| 1433 // Technically, ARM has their own aeabi routines, but we can use the | |
|
sehr
2015/11/13 21:56:29
either "has its" or "have their".
John
2015/11/14 00:00:38
For a moment I thought this was Jim. :)
Done.
| |
| 1434 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
| 1435 // the more standard __moddi3 for rem. | |
| 1436 const char *HelperName = ""; | |
| 1437 switch (Op) { | |
| 1438 default: | |
| 1439 llvm::report_fatal_error("Should have only matched div ops."); | |
| 1440 break; | |
| 1441 case InstArithmetic::Udiv: | |
| 1442 HelperName = H_udiv_i64; | |
| 1443 break; | |
| 1444 case InstArithmetic::Sdiv: | |
| 1445 HelperName = H_sdiv_i64; | |
| 1446 break; | |
| 1447 case InstArithmetic::Urem: | |
| 1448 HelperName = H_urem_i64; | |
| 1449 break; | |
| 1450 case InstArithmetic::Srem: | |
| 1451 HelperName = H_srem_i64; | |
| 1452 break; | |
| 1453 } | |
| 1454 constexpr SizeT MaxSrcs = 2; | |
| 1455 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
| 1456 Call->addArg(Src0); | |
| 1457 Call->addArg(Src1); | |
| 1458 lowerCall(Call); | |
| 1459 return; | |
| 1460 } | |
| 1461 } | |
| 1462 | |
| 1463 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1464 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1465 Variable *Src0RLo = nullptr; | |
| 1466 Variable *Src0RHi = nullptr; | |
| 1467 // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr. | |
|
Jim Stichnoth
2015/11/16 13:56:10
s/got/for/ ?
Lshr
| |
| 1468 if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) { | |
| 1469 Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 1470 } | |
| 1471 if (Op != InstArithmetic::Shl) { | |
| 1472 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 1473 } | |
| 1474 Operand *Src1Lo = loOperand(Src1); | |
| 1475 Operand *Src1Hi = hiOperand(Src1); | |
| 1476 Variable *T_Lo = makeReg(DestLo->getType()); | |
| 1477 Variable *T_Hi = makeReg(DestHi->getType()); | |
| 1478 | |
| 1479 switch (Op) { | |
| 1480 case InstArithmetic::_num: | |
| 1481 llvm::report_fatal_error("Unknown arithmetic operator"); | |
| 1482 return; | |
| 1483 case InstArithmetic::Add: | |
| 1484 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1485 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1486 _adds(T_Lo, Src0RLo, Src1Lo); | |
| 1487 _mov(DestLo, T_Lo); | |
| 1488 _adc(T_Hi, Src0RHi, Src1Hi); | |
| 1489 _mov(DestHi, T_Hi); | |
| 1490 return; | |
| 1491 case InstArithmetic::And: | |
| 1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1494 _and(T_Lo, Src0RLo, Src1Lo); | |
| 1495 _mov(DestLo, T_Lo); | |
| 1496 _and(T_Hi, Src0RHi, Src1Hi); | |
| 1497 _mov(DestHi, T_Hi); | |
| 1498 return; | |
| 1499 case InstArithmetic::Or: | |
| 1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1502 _orr(T_Lo, Src0RLo, Src1Lo); | |
| 1503 _mov(DestLo, T_Lo); | |
| 1504 _orr(T_Hi, Src0RHi, Src1Hi); | |
| 1505 _mov(DestHi, T_Hi); | |
| 1506 return; | |
| 1507 case InstArithmetic::Xor: | |
| 1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1510 _eor(T_Lo, Src0RLo, Src1Lo); | |
| 1511 _mov(DestLo, T_Lo); | |
| 1512 _eor(T_Hi, Src0RHi, Src1Hi); | |
| 1513 _mov(DestHi, T_Hi); | |
| 1514 return; | |
| 1515 case InstArithmetic::Sub: | |
| 1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1518 _subs(T_Lo, Src0RLo, Src1Lo); | |
| 1519 _mov(DestLo, T_Lo); | |
| 1520 _sbc(T_Hi, Src0RHi, Src1Hi); | |
| 1521 _mov(DestHi, T_Hi); | |
| 1522 return; | |
| 1523 case InstArithmetic::Mul: { | |
| 1524 // GCC 4.8 does: | |
| 1525 // a=b*c ==> | |
| 1526 // t_acc =(mul) (b.lo * c.hi) | |
| 1527 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
| 1528 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1529 // t.hi += t_acc | |
| 1530 // a.lo = t.lo | |
| 1531 // a.hi = t.hi | |
| 1532 // | |
| 1533 // LLVM does: | |
| 1534 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1535 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
| 1536 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
| 1537 // a.lo = t.lo | |
| 1538 // a.hi = t.hi | |
| 1539 // | |
| 1540 // LLVM's lowering has fewer instructions, but more register pressure: | |
| 1541 // t.lo is live from beginning to end, while GCC delays the two-dest | |
| 1542 // instruction till the end, and kills c.hi immediately. | |
| 1543 Variable *T_Acc = makeReg(IceType_i32); | |
| 1544 Variable *T_Acc1 = makeReg(IceType_i32); | |
| 1545 Variable *T_Hi1 = makeReg(IceType_i32); | |
| 1546 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1547 Variable *Src1RHi = legalizeToReg(Src1Hi); | |
| 1548 _mul(T_Acc, Src0RLo, Src1RHi); | |
| 1549 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
| 1550 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
| 1551 _add(T_Hi, T_Hi1, T_Acc1); | |
| 1552 _mov(DestLo, T_Lo); | |
| 1553 _mov(DestHi, T_Hi); | |
| 1554 return; | |
| 1555 } | |
| 1556 case InstArithmetic::Shl: { | |
| 1557 assert(Src0RLo != nullptr); | |
| 1558 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | |
| 1559 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | |
| 1560 const int32_t ShAmtImm = C->getValue() & 0x3F; | |
| 1561 if (ShAmtImm == 0) { | |
| 1562 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 1563 _mov(DestLo, Src0RLo); | |
| 1564 _mov(DestHi, Src0RHi); | |
| 1565 return; | |
| 1566 } | |
| 1567 | |
| 1568 if (ShAmtImm >= 32) { | |
| 1569 if (ShAmtImm == 32) { | |
| 1570 _mov(DestHi, Src0RLo); | |
| 1571 } else { | |
| 1572 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), | |
| 1573 Legal_Reg | Legal_Flex); | |
| 1574 _lsl(T_Hi, Src0RLo, ShAmtOp); | |
| 1575 _mov(DestHi, T_Hi); | |
| 1576 } | |
| 1577 | |
| 1578 Operand *_0 = | |
| 1579 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | |
| 1580 _mov(T_Lo, _0); | |
| 1581 _mov(DestLo, T_Lo); | |
| 1582 return; | |
| 1583 } | |
| 1584 | |
| 1585 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 1586 Operand *ShAmtOp = | |
| 1587 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); | |
| 1588 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), | |
| 1589 Legal_Reg | Legal_Flex); | |
| 1590 _lsl(T_Hi, Src0RHi, ShAmtOp); | |
| 1591 _orr(T_Hi, T_Hi, | |
| 1592 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | |
| 1593 OperandARM32::LSR, ComplShAmtOp)); | |
| 1594 _mov(DestHi, T_Hi); | |
| 1595 | |
| 1596 _lsl(T_Lo, Src0RLo, ShAmtOp); | |
| 1597 _mov(DestLo, T_Lo); | |
| 1598 return; | |
| 1599 } | |
| 1600 | |
| 1601 // a=b<<c ==> | |
| 1602 // pnacl-llc does: | |
| 1603 // mov t_b.lo, b.lo | |
| 1604 // mov t_b.hi, b.hi | |
| 1605 // mov t_c.lo, c.lo | |
| 1606 // rsb T0, t_c.lo, #32 | |
| 1607 // lsr T1, t_b.lo, T0 | |
| 1608 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
| 1609 // sub T2, t_c.lo, #32 | |
| 1610 // cmp T2, #0 | |
| 1611 // lslge t_a.hi, t_b.lo, T2 | |
| 1612 // lsl t_a.lo, t_b.lo, t_c.lo | |
| 1613 // mov a.lo, t_a.lo | |
| 1614 // mov a.hi, t_a.hi | |
| 1615 // | |
| 1616 // GCC 4.8 does: | |
| 1617 // sub t_c1, c.lo, #32 | |
| 1618 // lsl t_hi, b.hi, c.lo | |
| 1619 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
| 1620 // rsb t_c2, c.lo, #32 | |
| 1621 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
| 1622 // lsl t_lo, b.lo, c.lo | |
| 1623 // a.lo = t_lo | |
| 1624 // a.hi = t_hi | |
| 1625 // | |
| 1626 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1627 // Can be strength-reduced for constant-shifts, but we don't do that for | |
| 1628 // now. | |
| 1629 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
| 1630 // ARM, shifts only take the lower 8 bits of the shift register, and | |
| 1631 // saturate to the range 0-32, so the negative value will saturate to 32. | |
| 1632 Constant *_32 = Ctx->getConstantInt32(32); | |
| 1633 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
| 1634 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 1635 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1636 Variable *T0 = makeReg(IceType_i32); | |
| 1637 Variable *T1 = makeReg(IceType_i32); | |
| 1638 Variable *T2 = makeReg(IceType_i32); | |
| 1639 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1640 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1641 _rsb(T0, Src1RLo, _32); | |
| 1642 _lsr(T1, Src0RLo, T0); | |
| 1643 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1644 OperandARM32::LSL, Src1RLo)); | |
| 1645 _sub(T2, Src1RLo, _32); | |
| 1646 _cmp(T2, _0); | |
| 1647 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
| 1648 _set_dest_redefined(); | |
| 1649 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
| 1650 _mov(DestLo, TA_Lo); | |
| 1651 _mov(DestHi, TA_Hi); | |
| 1652 return; | |
| 1653 } | |
| 1654 case InstArithmetic::Lshr: | |
| 1655 case InstArithmetic::Ashr: { | |
| 1656 assert(Src0RHi != nullptr); | |
| 1657 const bool ASR = Op == InstArithmetic::Ashr; | |
| 1658 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { | |
| 1659 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. | |
| 1660 const int32_t ShAmtImm = C->getValue() & 0x3F; | |
| 1661 if (ShAmtImm == 0) { | |
| 1662 Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 1663 _mov(DestLo, Src0RLo); | |
| 1664 _mov(DestHi, Src0RHi); | |
| 1665 return; | |
| 1666 } | |
| 1667 | |
| 1668 if (ShAmtImm >= 32) { | |
| 1669 if (ShAmtImm == 32) { | |
| 1670 _mov(DestLo, Src0RHi); | |
| 1671 } else { | |
| 1672 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), | |
| 1673 Legal_Reg | Legal_Flex); | |
| 1674 if (ASR) { | |
| 1675 _asr(T_Lo, Src0RHi, ShAmtOp); | |
| 1676 } else { | |
| 1677 _lsr(T_Lo, Src0RHi, ShAmtOp); | |
| 1678 } | |
| 1679 _mov(DestLo, T_Lo); | |
| 1680 } | |
| 1681 | |
| 1682 if (ASR) { | |
| 1683 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32), | |
| 1684 Legal_Reg | Legal_Flex); | |
| 1685 _asr(T_Hi, Src0RHi, _31); | |
| 1686 } else { | |
| 1687 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), | |
| 1688 Legal_Reg | Legal_Flex); | |
| 1689 _mov(T_Hi, _0); | |
| 1690 } | |
| 1691 _mov(DestHi, T_Hi); | |
| 1692 return; | |
| 1693 } | |
| 1694 | |
| 1695 Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 1696 Operand *ShAmtOp = | |
| 1697 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); | |
| 1698 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), | |
| 1699 Legal_Reg | Legal_Flex); | |
| 1700 _lsr(T_Lo, Src0RLo, ShAmtOp); | |
| 1701 _orr(T_Lo, T_Lo, | |
| 1702 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1703 OperandARM32::LSL, ComplShAmtOp)); | |
| 1704 _mov(DestLo, T_Lo); | |
| 1705 | |
| 1706 if (ASR) { | |
| 1707 _asr(T_Hi, Src0RHi, ShAmtOp); | |
| 1708 } else { | |
| 1709 _lsr(T_Hi, Src0RHi, ShAmtOp); | |
| 1710 } | |
| 1711 _mov(DestHi, T_Hi); | |
| 1712 return; | |
| 1713 } | |
| 1714 | |
| 1715 // a=b>>c | |
| 1716 // pnacl-llc does: | |
| 1717 // mov t_b.lo, b.lo | |
| 1718 // mov t_b.hi, b.hi | |
| 1719 // mov t_c.lo, c.lo | |
| 1720 // lsr T0, t_b.lo, t_c.lo | |
| 1721 // rsb T1, t_c.lo, #32 | |
| 1722 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
| 1723 // sub T2, t_c.lo, #32 | |
| 1724 // cmp T2, #0 | |
| 1725 // [al]srge t_a.lo, t_b.hi, T2 | |
| 1726 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
| 1727 // mov a.lo, t_a.lo | |
| 1728 // mov a.hi, t_a.hi | |
| 1729 // | |
| 1730 // GCC 4.8 does (lsr): | |
| 1731 // rsb t_c1, c.lo, #32 | |
| 1732 // lsr t_lo, b.lo, c.lo | |
| 1733 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
| 1734 // sub t_c2, c.lo, #32 | |
| 1735 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
| 1736 // lsr t_hi, b.hi, c.lo | |
| 1737 // mov a.lo, t_lo | |
| 1738 // mov a.hi, t_hi | |
| 1739 // | |
| 1740 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1741 const bool IsAshr = Op == InstArithmetic::Ashr; | |
| 1742 Constant *_32 = Ctx->getConstantInt32(32); | |
| 1743 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
| 1744 Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 1745 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1746 Variable *T0 = makeReg(IceType_i32); | |
| 1747 Variable *T1 = makeReg(IceType_i32); | |
| 1748 Variable *T2 = makeReg(IceType_i32); | |
| 1749 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1750 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1751 _lsr(T0, Src0RLo, Src1RLo); | |
| 1752 _rsb(T1, Src1RLo, _32); | |
| 1753 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1754 OperandARM32::LSL, T1)); | |
| 1755 _sub(T2, Src1RLo, _32); | |
| 1756 _cmp(T2, _0); | |
| 1757 if (IsAshr) { | |
| 1758 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1759 _set_dest_redefined(); | |
| 1760 _asr(TA_Hi, Src0RHi, Src1RLo); | |
| 1761 } else { | |
| 1762 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1763 _set_dest_redefined(); | |
| 1764 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
| 1765 } | |
| 1766 _mov(DestLo, TA_Lo); | |
| 1767 _mov(DestHi, TA_Hi); | |
| 1768 return; | |
| 1769 } | |
| 1770 case InstArithmetic::Fadd: | |
| 1771 case InstArithmetic::Fsub: | |
| 1772 case InstArithmetic::Fmul: | |
| 1773 case InstArithmetic::Fdiv: | |
| 1774 case InstArithmetic::Frem: | |
| 1775 llvm::report_fatal_error("FP instruction with i64 type"); | |
| 1776 return; | |
| 1777 case InstArithmetic::Udiv: | |
| 1778 case InstArithmetic::Sdiv: | |
| 1779 case InstArithmetic::Urem: | |
| 1780 case InstArithmetic::Srem: | |
| 1781 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " | |
| 1782 "should have already been handled before"); | |
| 1783 return; | |
| 1784 } | |
| 1785 } | |
| 1786 | |
| 1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 1787 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| 1408 Variable *Dest = Inst->getDest(); | 1788 Variable *Dest = Inst->getDest(); |
| 1409 if (Dest->getType() == IceType_i1) { | 1789 if (Dest->getType() == IceType_i1) { |
| 1410 lowerInt1Arithmetic(Inst); | 1790 lowerInt1Arithmetic(Inst); |
| 1411 return; | 1791 return; |
| 1412 } | 1792 } |
| 1413 | 1793 |
| 1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to | 1794 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to |
| 1415 // legalize Src0 to flex or Src1 to flex and there is a reversible | 1795 // legalize Src0 to flex or Src1 to flex and there is a reversible |
| 1416 // instruction. E.g., reverse subtract with immediate, register vs register, | 1796 // instruction. E.g., reverse subtract with immediate, register vs register, |
| 1417 // immediate. | 1797 // immediate. |
| 1418 // Or it may be the case that the operands aren't swapped, but the bits can | 1798 // Or it may be the case that the operands aren't swapped, but the bits can |
| 1419 // be flipped and a different operation applied. E.g., use BIC (bit clear) | 1799 // be flipped and a different operation applied. E.g., use BIC (bit clear) |
| 1420 // instead of AND for some masks. | 1800 // instead of AND for some masks. |
| 1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1801 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| 1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 1802 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
| 1423 if (Dest->getType() == IceType_i64) { | 1803 if (Dest->getType() == IceType_i64) { |
| 1424 // These helper-call-involved instructions are lowered in this separate | 1804 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); |
| 1425 // switch. This is because we would otherwise assume that we need to | |
| 1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with | |
| 1427 // helper calls, and such unused/redundant instructions will fail liveness | |
| 1428 // analysis under -Om1 setting. | |
| 1429 switch (Inst->getOp()) { | |
| 1430 default: | |
| 1431 break; | |
| 1432 case InstArithmetic::Udiv: | |
| 1433 case InstArithmetic::Sdiv: | |
| 1434 case InstArithmetic::Urem: | |
| 1435 case InstArithmetic::Srem: { | |
| 1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to | |
| 1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a | |
| 1438 // register, which will hide a constant source operand. Instead, check | |
| 1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. | |
| 1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | |
| 1441 if (C64->getValue() == 0) { | |
| 1442 _trap(); | |
| 1443 return; | |
| 1444 } | |
| 1445 } else { | |
| 1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
| 1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
| 1448 div0Check(IceType_i64, Src1Lo, Src1Hi); | |
| 1449 } | |
| 1450 // Technically, ARM has their own aeabi routines, but we can use the | |
| 1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses | |
| 1452 // the more standard __moddi3 for rem. | |
| 1453 const char *HelperName = ""; | |
| 1454 switch (Inst->getOp()) { | |
| 1455 default: | |
| 1456 llvm_unreachable("Should have only matched div ops."); | |
| 1457 break; | |
| 1458 case InstArithmetic::Udiv: | |
| 1459 HelperName = H_udiv_i64; | |
| 1460 break; | |
| 1461 case InstArithmetic::Sdiv: | |
| 1462 HelperName = H_sdiv_i64; | |
| 1463 break; | |
| 1464 case InstArithmetic::Urem: | |
| 1465 HelperName = H_urem_i64; | |
| 1466 break; | |
| 1467 case InstArithmetic::Srem: | |
| 1468 HelperName = H_srem_i64; | |
| 1469 break; | |
| 1470 } | |
| 1471 constexpr SizeT MaxSrcs = 2; | |
| 1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); | |
| 1473 Call->addArg(Src0); | |
| 1474 Call->addArg(Src1); | |
| 1475 lowerCall(Call); | |
| 1476 return; | |
| 1477 } | |
| 1478 } | |
| 1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 1483 Operand *Src1Lo = loOperand(Src1); | |
| 1484 Operand *Src1Hi = hiOperand(Src1); | |
| 1485 Variable *T_Lo = makeReg(DestLo->getType()); | |
| 1486 Variable *T_Hi = makeReg(DestHi->getType()); | |
| 1487 switch (Inst->getOp()) { | |
| 1488 case InstArithmetic::_num: | |
| 1489 llvm_unreachable("Unknown arithmetic operator"); | |
| 1490 return; | |
| 1491 case InstArithmetic::Add: | |
| 1492 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1493 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1494 _adds(T_Lo, Src0RLo, Src1Lo); | |
| 1495 _mov(DestLo, T_Lo); | |
| 1496 _adc(T_Hi, Src0RHi, Src1Hi); | |
| 1497 _mov(DestHi, T_Hi); | |
| 1498 return; | |
| 1499 case InstArithmetic::And: | |
| 1500 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1501 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1502 _and(T_Lo, Src0RLo, Src1Lo); | |
| 1503 _mov(DestLo, T_Lo); | |
| 1504 _and(T_Hi, Src0RHi, Src1Hi); | |
| 1505 _mov(DestHi, T_Hi); | |
| 1506 return; | |
| 1507 case InstArithmetic::Or: | |
| 1508 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1509 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1510 _orr(T_Lo, Src0RLo, Src1Lo); | |
| 1511 _mov(DestLo, T_Lo); | |
| 1512 _orr(T_Hi, Src0RHi, Src1Hi); | |
| 1513 _mov(DestHi, T_Hi); | |
| 1514 return; | |
| 1515 case InstArithmetic::Xor: | |
| 1516 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1517 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1518 _eor(T_Lo, Src0RLo, Src1Lo); | |
| 1519 _mov(DestLo, T_Lo); | |
| 1520 _eor(T_Hi, Src0RHi, Src1Hi); | |
| 1521 _mov(DestHi, T_Hi); | |
| 1522 return; | |
| 1523 case InstArithmetic::Sub: | |
| 1524 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); | |
| 1525 Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); | |
| 1526 _subs(T_Lo, Src0RLo, Src1Lo); | |
| 1527 _mov(DestLo, T_Lo); | |
| 1528 _sbc(T_Hi, Src0RHi, Src1Hi); | |
| 1529 _mov(DestHi, T_Hi); | |
| 1530 return; | |
| 1531 case InstArithmetic::Mul: { | |
| 1532 // GCC 4.8 does: | |
| 1533 // a=b*c ==> | |
| 1534 // t_acc =(mul) (b.lo * c.hi) | |
| 1535 // t_acc =(mla) (c.lo * b.hi) + t_acc | |
| 1536 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1537 // t.hi += t_acc | |
| 1538 // a.lo = t.lo | |
| 1539 // a.hi = t.hi | |
| 1540 // | |
| 1541 // LLVM does: | |
| 1542 // t.hi,t.lo =(umull) b.lo * c.lo | |
| 1543 // t.hi =(mla) (b.lo * c.hi) + t.hi | |
| 1544 // t.hi =(mla) (b.hi * c.lo) + t.hi | |
| 1545 // a.lo = t.lo | |
| 1546 // a.hi = t.hi | |
| 1547 // | |
| 1548 // LLVM's lowering has fewer instructions, but more register pressure: | |
| 1549 // t.lo is live from beginning to end, while GCC delays the two-dest | |
| 1550 // instruction till the end, and kills c.hi immediately. | |
| 1551 Variable *T_Acc = makeReg(IceType_i32); | |
| 1552 Variable *T_Acc1 = makeReg(IceType_i32); | |
| 1553 Variable *T_Hi1 = makeReg(IceType_i32); | |
| 1554 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1555 Variable *Src1RHi = legalizeToReg(Src1Hi); | |
| 1556 _mul(T_Acc, Src0RLo, Src1RHi); | |
| 1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); | |
| 1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); | |
| 1559 _add(T_Hi, T_Hi1, T_Acc1); | |
| 1560 _mov(DestLo, T_Lo); | |
| 1561 _mov(DestHi, T_Hi); | |
| 1562 return; | |
| 1563 } | |
| 1564 case InstArithmetic::Shl: { | |
| 1565 // a=b<<c ==> | |
| 1566 // pnacl-llc does: | |
| 1567 // mov t_b.lo, b.lo | |
| 1568 // mov t_b.hi, b.hi | |
| 1569 // mov t_c.lo, c.lo | |
| 1570 // rsb T0, t_c.lo, #32 | |
| 1571 // lsr T1, t_b.lo, T0 | |
| 1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo | |
| 1573 // sub T2, t_c.lo, #32 | |
| 1574 // cmp T2, #0 | |
| 1575 // lslge t_a.hi, t_b.lo, T2 | |
| 1576 // lsl t_a.lo, t_b.lo, t_c.lo | |
| 1577 // mov a.lo, t_a.lo | |
| 1578 // mov a.hi, t_a.hi | |
| 1579 // | |
| 1580 // GCC 4.8 does: | |
| 1581 // sub t_c1, c.lo, #32 | |
| 1582 // lsl t_hi, b.hi, c.lo | |
| 1583 // orr t_hi, t_hi, b.lo, lsl t_c1 | |
| 1584 // rsb t_c2, c.lo, #32 | |
| 1585 // orr t_hi, t_hi, b.lo, lsr t_c2 | |
| 1586 // lsl t_lo, b.lo, c.lo | |
| 1587 // a.lo = t_lo | |
| 1588 // a.hi = t_hi | |
| 1589 // | |
| 1590 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1591 // Can be strength-reduced for constant-shifts, but we don't do that for | |
| 1592 // now. | |
| 1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On | |
| 1594 // ARM, shifts only take the lower 8 bits of the shift register, and | |
| 1595 // saturate to the range 0-32, so the negative value will saturate to 32. | |
| 1596 Constant *_32 = Ctx->getConstantInt32(32); | |
| 1597 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
| 1598 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1599 Variable *T0 = makeReg(IceType_i32); | |
| 1600 Variable *T1 = makeReg(IceType_i32); | |
| 1601 Variable *T2 = makeReg(IceType_i32); | |
| 1602 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1603 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1604 _rsb(T0, Src1RLo, _32); | |
| 1605 _lsr(T1, Src0RLo, T0); | |
| 1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1607 OperandARM32::LSL, Src1RLo)); | |
| 1608 _sub(T2, Src1RLo, _32); | |
| 1609 _cmp(T2, _0); | |
| 1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); | |
| 1611 _set_dest_redefined(); | |
| 1612 _lsl(TA_Lo, Src0RLo, Src1RLo); | |
| 1613 _mov(DestLo, TA_Lo); | |
| 1614 _mov(DestHi, TA_Hi); | |
| 1615 return; | |
| 1616 } | |
| 1617 case InstArithmetic::Lshr: | |
| 1618 case InstArithmetic::Ashr: { | |
| 1619 // a=b>>c | |
| 1620 // pnacl-llc does: | |
| 1621 // mov t_b.lo, b.lo | |
| 1622 // mov t_b.hi, b.hi | |
| 1623 // mov t_c.lo, c.lo | |
| 1624 // lsr T0, t_b.lo, t_c.lo | |
| 1625 // rsb T1, t_c.lo, #32 | |
| 1626 // orr t_a.lo, T0, t_b.hi, lsl T1 | |
| 1627 // sub T2, t_c.lo, #32 | |
| 1628 // cmp T2, #0 | |
| 1629 // [al]srge t_a.lo, t_b.hi, T2 | |
| 1630 // [al]sr t_a.hi, t_b.hi, t_c.lo | |
| 1631 // mov a.lo, t_a.lo | |
| 1632 // mov a.hi, t_a.hi | |
| 1633 // | |
| 1634 // GCC 4.8 does (lsr): | |
| 1635 // rsb t_c1, c.lo, #32 | |
| 1636 // lsr t_lo, b.lo, c.lo | |
| 1637 // orr t_lo, t_lo, b.hi, lsl t_c1 | |
| 1638 // sub t_c2, c.lo, #32 | |
| 1639 // orr t_lo, t_lo, b.hi, lsr t_c2 | |
| 1640 // lsr t_hi, b.hi, c.lo | |
| 1641 // mov a.lo, t_lo | |
| 1642 // mov a.hi, t_hi | |
| 1643 // | |
| 1644 // These are incompatible, therefore we mimic pnacl-llc. | |
| 1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | |
| 1646 Constant *_32 = Ctx->getConstantInt32(32); | |
| 1647 Constant *_0 = Ctx->getConstantZero(IceType_i32); | |
| 1648 Variable *Src1RLo = legalizeToReg(Src1Lo); | |
| 1649 Variable *T0 = makeReg(IceType_i32); | |
| 1650 Variable *T1 = makeReg(IceType_i32); | |
| 1651 Variable *T2 = makeReg(IceType_i32); | |
| 1652 Variable *TA_Lo = makeReg(IceType_i32); | |
| 1653 Variable *TA_Hi = makeReg(IceType_i32); | |
| 1654 _lsr(T0, Src0RLo, Src1RLo); | |
| 1655 _rsb(T1, Src1RLo, _32); | |
| 1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | |
| 1657 OperandARM32::LSL, T1)); | |
| 1658 _sub(T2, Src1RLo, _32); | |
| 1659 _cmp(T2, _0); | |
| 1660 if (IsAshr) { | |
| 1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1662 _set_dest_redefined(); | |
| 1663 _asr(TA_Hi, Src0RHi, Src1RLo); | |
| 1664 } else { | |
| 1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); | |
| 1666 _set_dest_redefined(); | |
| 1667 _lsr(TA_Hi, Src0RHi, Src1RLo); | |
| 1668 } | |
| 1669 _mov(DestLo, TA_Lo); | |
| 1670 _mov(DestHi, TA_Hi); | |
| 1671 return; | |
| 1672 } | |
| 1673 case InstArithmetic::Fadd: | |
| 1674 case InstArithmetic::Fsub: | |
| 1675 case InstArithmetic::Fmul: | |
| 1676 case InstArithmetic::Fdiv: | |
| 1677 case InstArithmetic::Frem: | |
| 1678 llvm_unreachable("FP instruction with i64 type"); | |
| 1679 return; | |
| 1680 case InstArithmetic::Udiv: | |
| 1681 case InstArithmetic::Sdiv: | |
| 1682 case InstArithmetic::Urem: | |
| 1683 case InstArithmetic::Srem: | |
| 1684 llvm_unreachable("Call-helper-involved instruction for i64 type " | |
| 1685 "should have already been handled before"); | |
| 1686 return; | |
| 1687 } | |
| 1688 return; | 1805 return; |
| 1689 } else if (isVectorType(Dest->getType())) { | 1806 } |
| 1807 | |
| 1808 if (isVectorType(Dest->getType())) { | |
| 1690 // Add a fake def to keep liveness consistent in the meantime. | 1809 // Add a fake def to keep liveness consistent in the meantime. |
| 1691 Variable *T = makeReg(Dest->getType()); | 1810 Variable *T = makeReg(Dest->getType()); |
| 1692 Context.insert(InstFakeDef::create(Func, T)); | 1811 Context.insert(InstFakeDef::create(Func, T)); |
| 1693 _mov(Dest, T); | 1812 _mov(Dest, T); |
| 1694 UnimplementedError(Func->getContext()->getFlags()); | 1813 UnimplementedError(Func->getContext()->getFlags()); |
| 1695 return; | 1814 return; |
| 1696 } | 1815 } |
| 1816 | |
| 1697 // Dest->getType() is a non-i64 scalar. | 1817 // Dest->getType() is a non-i64 scalar. |
| 1698 Variable *Src0R = legalizeToReg(Src0); | 1818 Variable *Src0R = legalizeToReg(Src0); |
| 1699 Variable *T = makeReg(Dest->getType()); | 1819 Variable *T = makeReg(Dest->getType()); |
| 1820 | |
| 1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect | 1821 // Handle div/rem separately. They require a non-legalized Src1 to inspect |
| 1701 // whether or not Src1 is a non-zero constant. Once legalized it is more | 1822 // whether or not Src1 is a non-zero constant. Once legalized it is more |
| 1702 // difficult to determine (constant may be moved to a register). | 1823 // difficult to determine (constant may be moved to a register). |
| 1703 switch (Inst->getOp()) { | 1824 switch (Inst->getOp()) { |
| 1704 default: | 1825 default: |
| 1705 break; | 1826 break; |
| 1706 case InstArithmetic::Udiv: { | 1827 case InstArithmetic::Udiv: { |
| 1707 constexpr bool NotRemainder = false; | 1828 constexpr bool NotRemainder = false; |
| 1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, | 1829 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
| 1709 H_udiv_i32, NotRemainder); | 1830 H_udiv_i32, NotRemainder); |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1766 Variable *Src1R = legalizeToReg(Src1); | 1887 Variable *Src1R = legalizeToReg(Src1); |
| 1767 _vdiv(T, Src0R, Src1R); | 1888 _vdiv(T, Src0R, Src1R); |
| 1768 _mov(Dest, T); | 1889 _mov(Dest, T); |
| 1769 return; | 1890 return; |
| 1770 } | 1891 } |
| 1771 } | 1892 } |
| 1772 | 1893 |
| 1773 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | 1894 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
| 1774 switch (Inst->getOp()) { | 1895 switch (Inst->getOp()) { |
| 1775 case InstArithmetic::_num: | 1896 case InstArithmetic::_num: |
| 1776 llvm_unreachable("Unknown arithmetic operator"); | 1897 llvm::report_fatal_error("Unknown arithmetic operator"); |
| 1777 return; | 1898 return; |
| 1778 case InstArithmetic::Add: | 1899 case InstArithmetic::Add: |
| 1779 _add(T, Src0R, Src1RF); | 1900 _add(T, Src0R, Src1RF); |
| 1780 _mov(Dest, T); | 1901 _mov(Dest, T); |
| 1781 return; | 1902 return; |
| 1782 case InstArithmetic::And: | 1903 case InstArithmetic::And: |
| 1783 _and(T, Src0R, Src1RF); | 1904 _and(T, Src0R, Src1RF); |
| 1784 _mov(Dest, T); | 1905 _mov(Dest, T); |
| 1785 return; | 1906 return; |
| 1786 case InstArithmetic::Or: | 1907 case InstArithmetic::Or: |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 1816 if (Dest->getType() != IceType_i32) { | 1937 if (Dest->getType() != IceType_i32) { |
| 1817 _sxt(Src0R, Src0R); | 1938 _sxt(Src0R, Src0R); |
| 1818 } | 1939 } |
| 1819 _asr(T, Src0R, Src1RF); | 1940 _asr(T, Src0R, Src1RF); |
| 1820 _mov(Dest, T); | 1941 _mov(Dest, T); |
| 1821 return; | 1942 return; |
| 1822 case InstArithmetic::Udiv: | 1943 case InstArithmetic::Udiv: |
| 1823 case InstArithmetic::Sdiv: | 1944 case InstArithmetic::Sdiv: |
| 1824 case InstArithmetic::Urem: | 1945 case InstArithmetic::Urem: |
| 1825 case InstArithmetic::Srem: | 1946 case InstArithmetic::Srem: |
| 1826 llvm_unreachable("Integer div/rem should have been handled earlier."); | 1947 llvm::report_fatal_error( |
| 1948 "Integer div/rem should have been handled earlier."); | |
| 1827 return; | 1949 return; |
| 1828 case InstArithmetic::Fadd: | 1950 case InstArithmetic::Fadd: |
| 1829 case InstArithmetic::Fsub: | 1951 case InstArithmetic::Fsub: |
| 1830 case InstArithmetic::Fmul: | 1952 case InstArithmetic::Fmul: |
| 1831 case InstArithmetic::Fdiv: | 1953 case InstArithmetic::Fdiv: |
| 1832 case InstArithmetic::Frem: | 1954 case InstArithmetic::Frem: |
| 1833 llvm_unreachable("Floating point arith should have been handled earlier."); | 1955 llvm::report_fatal_error( |
| 1956 "Floating point arith should have been handled earlier."); | |
| 1834 return; | 1957 return; |
| 1835 } | 1958 } |
| 1836 } | 1959 } |
| 1837 | 1960 |
| 1838 void TargetARM32::lowerAssign(const InstAssign *Inst) { | 1961 void TargetARM32::lowerAssign(const InstAssign *Inst) { |
| 1839 Variable *Dest = Inst->getDest(); | 1962 Variable *Dest = Inst->getDest(); |
| 1840 Operand *Src0 = Inst->getSrc(0); | 1963 Operand *Src0 = Inst->getSrc(0); |
| 1841 assert(Dest->getType() == Src0->getType()); | 1964 assert(Dest->getType() == Src0->getType()); |
| 1842 if (Dest->getType() == IceType_i64) { | 1965 if (Dest->getType() == IceType_i64) { |
| 1843 Src0 = legalizeUndef(Src0); | 1966 Src0 = legalizeUndef(Src0); |
| 1967 | |
| 1968 Variable *T_Lo = makeReg(IceType_i32); | |
| 1969 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 1970 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| 1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
| 1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1848 Variable *T_Lo = makeReg(IceType_i32); | |
| 1849 Variable *T_Hi = makeReg(IceType_i32); | |
| 1850 | |
| 1851 _mov(T_Lo, Src0Lo); | 1971 _mov(T_Lo, Src0Lo); |
| 1852 _mov(DestLo, T_Lo); | 1972 _mov(DestLo, T_Lo); |
| 1973 | |
| 1974 Variable *T_Hi = makeReg(IceType_i32); | |
| 1975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 1976 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
| 1853 _mov(T_Hi, Src0Hi); | 1977 _mov(T_Hi, Src0Hi); |
| 1854 _mov(DestHi, T_Hi); | 1978 _mov(DestHi, T_Hi); |
| 1979 | |
| 1980 return; | |
| 1981 } | |
| 1982 | |
| 1983 Operand *NewSrc; | |
| 1984 if (Dest->hasReg()) { | |
| 1985 // If Dest already has a physical register, then legalize the Src operand | |
| 1986 // into a Variable with the same register assignment. This especially | |
| 1987 // helps allow the use of Flex operands. | |
| 1988 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
| 1855 } else { | 1989 } else { |
| 1856 Operand *NewSrc; | 1990 // Dest could be a stack operand. Since we could potentially need to do a |
| 1857 if (Dest->hasReg()) { | 1991 // Store (and store can only have Register operands), legalize this to a |
| 1858 // If Dest already has a physical register, then legalize the Src operand | 1992 // register. |
| 1859 // into a Variable with the same register assignment. This especially | 1993 NewSrc = legalize(Src0, Legal_Reg); |
| 1860 // helps allow the use of Flex operands. | |
| 1861 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | |
| 1862 } else { | |
| 1863 // Dest could be a stack operand. Since we could potentially need to do a | |
| 1864 // Store (and store can only have Register operands), legalize this to a | |
| 1865 // register. | |
| 1866 NewSrc = legalize(Src0, Legal_Reg); | |
| 1867 } | |
| 1868 if (isVectorType(Dest->getType())) { | |
| 1869 Variable *SrcR = legalizeToReg(NewSrc); | |
| 1870 _mov(Dest, SrcR); | |
| 1871 } else if (isFloatingType(Dest->getType())) { | |
| 1872 Variable *SrcR = legalizeToReg(NewSrc); | |
| 1873 _mov(Dest, SrcR); | |
| 1874 } else { | |
| 1875 _mov(Dest, NewSrc); | |
| 1876 } | |
| 1877 } | 1994 } |
| 1995 | |
| 1996 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { | |
| 1997 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); | |
| 1998 } | |
| 1999 _mov(Dest, NewSrc); | |
| 1878 } | 2000 } |
| 1879 | 2001 |
| 1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( | 2002 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
| 1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, | 2003 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue, |
| 1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { | 2004 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) { |
| 1883 InstARM32Label *NewShortCircuitLabel = nullptr; | 2005 InstARM32Label *NewShortCircuitLabel = nullptr; |
| 1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); | 2006 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex); |
| 1885 | 2007 |
| 1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean); | 2008 const Inst *Producer = BoolComputations.getProducerOf(Boolean); |
| 1887 | 2009 |
| (...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2573 struct { | 2695 struct { |
| 2574 CondARM32::Cond CC0; | 2696 CondARM32::Cond CC0; |
| 2575 CondARM32::Cond CC1; | 2697 CondARM32::Cond CC1; |
| 2576 } TableFcmp[] = { | 2698 } TableFcmp[] = { |
| 2577 #define X(val, CC0, CC1) \ | 2699 #define X(val, CC0, CC1) \ |
| 2578 { CondARM32::CC0, CondARM32::CC1 } \ | 2700 { CondARM32::CC0, CondARM32::CC1 } \ |
| 2579 , | 2701 , |
| 2580 FCMPARM32_TABLE | 2702 FCMPARM32_TABLE |
| 2581 #undef X | 2703 #undef X |
| 2582 }; | 2704 }; |
| 2705 | |
|
sehr
2015/11/13 21:56:29
Is there a more common place for this sort of func
John
2015/11/13 22:00:41
Maybe. If you think it's useful, you could add fro
John
2015/11/14 00:00:38
Oh, I thought this was Jim. He had the same routin
| |
| 2706 bool isFloatingPointZero(Operand *Src) { | |
| 2707 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { | |
| 2708 return F32->getValue() == 0.0f; | |
| 2709 } | |
| 2710 | |
| 2711 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { | |
| 2712 return F64->getValue() == 0.0; | |
| 2713 } | |
| 2714 | |
| 2715 return false; | |
| 2716 } | |
| 2583 } // end of anonymous namespace | 2717 } // end of anonymous namespace |
| 2584 | 2718 |
| 2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 2719 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
| 2586 InstFcmp::FCond Condition = Instr->getCondition(); | 2720 InstFcmp::FCond Condition = Instr->getCondition(); |
| 2587 switch (Condition) { | 2721 switch (Condition) { |
| 2588 case InstFcmp::False: | 2722 case InstFcmp::False: |
| 2589 return CondWhenTrue(CondARM32::kNone); | 2723 return CondWhenTrue(CondARM32::kNone); |
| 2590 case InstFcmp::True: | 2724 case InstFcmp::True: |
| 2591 return CondWhenTrue(CondARM32::AL); | 2725 return CondWhenTrue(CondARM32::AL); |
| 2592 break; | 2726 break; |
| 2593 default: { | 2727 default: { |
| 2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); | 2728 Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
| 2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1)); | 2729 Operand *Src1 = Instr->getSrc(1); |
| 2596 _vcmp(Src0R, Src1R); | 2730 if (isFloatingPointZero(Src1)) { |
| 2731 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); | |
| 2732 } else { | |
| 2733 _vcmp(Src0R, legalizeToReg(Src1)); | |
| 2734 } | |
| 2597 _vmrs(); | 2735 _vmrs(); |
| 2598 assert(Condition < llvm::array_lengthof(TableFcmp)); | 2736 assert(Condition < llvm::array_lengthof(TableFcmp)); |
| 2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); | 2737 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); |
| 2600 } | 2738 } |
| 2601 } | 2739 } |
| 2602 } | 2740 } |
| 2603 | 2741 |
| 2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { | 2742 void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
| 2605 Variable *Dest = Instr->getDest(); | 2743 Variable *Dest = Instr->getDest(); |
| 2606 if (isVectorType(Dest->getType())) { | 2744 if (isVectorType(Dest->getType())) { |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 2634 } else { | 2772 } else { |
| 2635 _mov(T, _1, Cond.WhenTrue0); | 2773 _mov(T, _1, Cond.WhenTrue0); |
| 2636 } | 2774 } |
| 2637 | 2775 |
| 2638 if (Cond.WhenTrue1 != CondARM32::kNone) { | 2776 if (Cond.WhenTrue1 != CondARM32::kNone) { |
| 2639 _mov_redefined(T, _1, Cond.WhenTrue1); | 2777 _mov_redefined(T, _1, Cond.WhenTrue1); |
| 2640 } | 2778 } |
| 2641 | 2779 |
| 2642 _mov(Dest, T); | 2780 _mov(Dest, T); |
| 2643 } | 2781 } |
| 2782 TargetARM32::CondWhenTrue | |
| 2783 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, | |
| 2784 Operand *Src1) { | |
| 2785 size_t Index = static_cast<size_t>(Condition); | |
| 2786 assert(Index < llvm::array_lengthof(TableIcmp64)); | |
| 2644 | 2787 |
| 2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { | 2788 Operand *NonConstOp = nullptr; |
| 2646 assert(Inst->getSrc(0)->getType() != IceType_i1); | 2789 uint64_t Value; |
| 2647 assert(Inst->getSrc(1)->getType() != IceType_i1); | 2790 if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
| 2791 Value = C->getValue(); | |
| 2792 NonConstOp = Src0; | |
| 2793 } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) { | |
| 2794 Value = C->getValue(); | |
| 2795 NonConstOp = Src1; | |
| 2796 } | |
| 2648 | 2797 |
| 2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 2798 Variable *Src0RLo, *Src0RHi; |
| 2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 2799 Operand *Src1RFLo, *Src1RFHi; |
| 2800 | |
| 2801 if (NonConstOp != nullptr) { | |
| 2802 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
| 2803 Value == 0) { | |
| 2804 Variable *T = makeReg(IceType_i32); | |
| 2805 _orrs(T, legalizeToReg(loOperand(NonConstOp)), | |
| 2806 legalize(hiOperand(NonConstOp), Legal_Reg | Legal_Flex)); | |
| 2807 Context.insert(InstFakeUse::create(Func, T)); | |
| 2808 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 2809 } | |
| 2810 | |
| 2811 Src0RLo = legalizeToReg(loOperand(NonConstOp)); | |
| 2812 Src0RHi = legalizeToReg(hiOperand(NonConstOp)); | |
| 2813 if ((Value >> 32) == (Value & 0xFFFFFFFF)) { | |
| 2814 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF), | |
| 2815 Legal_Reg | Legal_Flex); | |
| 2816 Src1RFHi = Src1RFLo; | |
| 2817 } else { | |
| 2818 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF), | |
| 2819 Legal_Reg | Legal_Flex); | |
| 2820 Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF), | |
| 2821 Legal_Reg | Legal_Flex); | |
| 2822 } | |
| 2823 | |
| 2824 bool UseRsb = false; | |
| 2825 if (TableIcmp64[Index].Swapped) { | |
| 2826 UseRsb = NonConstOp == Src0; | |
| 2827 } else { | |
| 2828 UseRsb = NonConstOp == Src1; | |
| 2829 } | |
| 2830 | |
| 2831 if (UseRsb) { | |
| 2832 if (TableIcmp64[Index].IsSigned) { | |
| 2833 Variable *T = makeReg(IceType_i32); | |
| 2834 _rsbs(T, Src0RLo, Src1RFLo); | |
| 2835 Context.insert(InstFakeUse::create(Func, T)); | |
| 2836 | |
| 2837 T = makeReg(IceType_i32); | |
| 2838 _rscs(T, Src0RHi, Src1RFHi); | |
| 2839 // We need to add a FakeUse here because liveness gets mad at us (Def | |
| 2840 // without Use.) Note that flag-setting instructions are considered to | |
| 2841 // have side effects and, therefore, are not DCE'ed. | |
| 2842 Context.insert(InstFakeUse::create(Func, T)); | |
| 2843 } else { | |
| 2844 Variable *T = makeReg(IceType_i32); | |
| 2845 _rsbs(T, Src0RHi, Src1RFHi); | |
| 2846 Context.insert(InstFakeUse::create(Func, T)); | |
| 2847 | |
| 2848 T = makeReg(IceType_i32); | |
| 2849 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); | |
| 2850 Context.insert(InstFakeUse::create(Func, T)); | |
| 2851 } | |
| 2852 } else { | |
| 2853 if (TableIcmp64[Index].IsSigned) { | |
| 2854 _cmp(Src0RLo, Src1RFLo); | |
| 2855 Variable *T = makeReg(IceType_i32); | |
| 2856 _sbcs(T, Src0RHi, Src1RFHi); | |
| 2857 Context.insert(InstFakeUse::create(Func, T)); | |
| 2858 } else { | |
| 2859 _cmp(Src0RHi, Src1RFHi); | |
| 2860 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | |
| 2861 } | |
| 2862 } | |
| 2863 | |
| 2864 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 2865 } | |
| 2866 | |
| 2867 if (TableIcmp64[Index].Swapped) { | |
| 2868 Src0RLo = legalizeToReg(loOperand(Src1)); | |
| 2869 Src0RHi = legalizeToReg(hiOperand(Src1)); | |
| 2870 Src1RFLo = legalizeToReg(loOperand(Src0)); | |
| 2871 Src1RFHi = legalizeToReg(hiOperand(Src0)); | |
| 2872 } else { | |
| 2873 Src0RLo = legalizeToReg(loOperand(Src0)); | |
| 2874 Src0RHi = legalizeToReg(hiOperand(Src0)); | |
| 2875 Src1RFLo = legalizeToReg(loOperand(Src1)); | |
| 2876 Src1RFHi = legalizeToReg(hiOperand(Src1)); | |
| 2877 } | |
| 2651 | 2878 |
| 2652 // a=icmp cond, b, c ==> | 2879 // a=icmp cond, b, c ==> |
| 2653 // GCC does: | 2880 // GCC does: |
| 2654 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2881 // cmp b.hi, c.hi or cmp b.lo, c.lo |
| 2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2882 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
| 2656 // mov.<C1> t, #1 mov.<C1> t, #1 | 2883 // mov.<C1> t, #1 mov.<C1> t, #1 |
| 2657 // mov.<C2> t, #0 mov.<C2> t, #0 | 2884 // mov.<C2> t, #0 mov.<C2> t, #0 |
| 2658 // mov a, t mov a, t | 2885 // mov a, t mov a, t |
| 2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 2886 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
| 2660 // is used for signed compares. In some cases, b and c need to be swapped as | 2887 // is used for signed compares. In some cases, b and c need to be swapped as |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 2671 // that's nice in that it's just as short but has fewer dependencies for | 2898 // that's nice in that it's just as short but has fewer dependencies for |
| 2672 // better ILP at the cost of more registers. | 2899 // better ILP at the cost of more registers. |
| 2673 // | 2900 // |
| 2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two | 2901 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
| 2675 // unconditional mov #0, two cmps, two conditional mov #1, and one | 2902 // unconditional mov #0, two cmps, two conditional mov #1, and one |
| 2676 // conditional reg mov. That has few dependencies for good ILP, but is a | 2903 // conditional reg mov. That has few dependencies for good ILP, but is a |
| 2677 // longer sequence. | 2904 // longer sequence. |
| 2678 // | 2905 // |
| 2679 // So, we are going with the GCC version since it's usually better (except | 2906 // So, we are going with the GCC version since it's usually better (except |
| 2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 2907 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
| 2908 if (TableIcmp64[Index].IsSigned) { | |
| 2909 Variable *ScratchReg = makeReg(IceType_i32); | |
| 2910 _cmp(Src0RLo, Src1RFLo); | |
| 2911 _sbcs(ScratchReg, Src0RHi, Src1RFHi); | |
| 2912 // ScratchReg isn't going to be used, but we need the side-effect of | |
| 2913 // setting flags from this operation. | |
| 2914 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
| 2915 } else { | |
| 2916 _cmp(Src0RHi, Src1RFHi); | |
| 2917 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); | |
| 2918 } | |
| 2919 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 2920 } | |
| 2681 | 2921 |
| 2682 if (Src0->getType() == IceType_i64) { | 2922 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
| 2683 InstIcmp::ICond Conditon = Inst->getCondition(); | 2923 assert(Inst->getSrc(0)->getType() != IceType_i1); |
| 2684 size_t Index = static_cast<size_t>(Conditon); | 2924 assert(Inst->getSrc(1)->getType() != IceType_i1); |
| 2685 assert(Index < llvm::array_lengthof(TableIcmp64)); | |
| 2686 Variable *Src0Lo, *Src0Hi; | |
| 2687 Operand *Src1LoRF, *Src1HiRF; | |
| 2688 if (TableIcmp64[Index].Swapped) { | |
| 2689 Src0Lo = legalizeToReg(loOperand(Src1)); | |
| 2690 Src0Hi = legalizeToReg(hiOperand(Src1)); | |
| 2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | |
| 2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | |
| 2693 } else { | |
| 2694 Src0Lo = legalizeToReg(loOperand(Src0)); | |
| 2695 Src0Hi = legalizeToReg(hiOperand(Src0)); | |
| 2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | |
| 2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | |
| 2698 } | |
| 2699 if (TableIcmp64[Index].IsSigned) { | |
| 2700 Variable *ScratchReg = makeReg(IceType_i32); | |
| 2701 _cmp(Src0Lo, Src1LoRF); | |
| 2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | |
| 2703 // ScratchReg isn't going to be used, but we need the side-effect of | |
| 2704 // setting flags from this operation. | |
| 2705 Context.insert(InstFakeUse::create(Func, ScratchReg)); | |
| 2706 } else { | |
| 2707 _cmp(Src0Hi, Src1HiRF); | |
| 2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | |
| 2709 } | |
| 2710 return CondWhenTrue(TableIcmp64[Index].C1); | |
| 2711 } | |
| 2712 | 2925 |
| 2926 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | |
| 2927 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | |
| 2928 | |
| 2929 InstIcmp::ICond Condition = Inst->getCondition(); | |
| 2713 // a=icmp cond b, c ==> | 2930 // a=icmp cond b, c ==> |
| 2714 // GCC does: | 2931 // GCC does: |
| 2715 // <u/s>xtb tb, b | 2932 // <u/s>xtb tb, b |
| 2716 // <u/s>xtb tc, c | 2933 // <u/s>xtb tc, c |
| 2717 // cmp tb, tc | 2934 // cmp tb, tc |
| 2718 // mov.C1 t, #0 | 2935 // mov.C1 t, #0 |
| 2719 // mov.C2 t, #1 | 2936 // mov.C2 t, #1 |
| 2720 // mov a, t | 2937 // mov a, t |
| 2721 // where the unsigned/sign extension is not needed for 32-bit. They also have | 2938 // where the unsigned/sign extension is not needed for 32-bit. They also have |
| 2722 // special cases for EQ and NE. E.g., for NE: | 2939 // special cases for EQ and NE. E.g., for NE: |
| 2723 // <extend to tb, tc> | 2940 // <extend to tb, tc> |
| 2724 // subs t, tb, tc | 2941 // subs t, tb, tc |
| 2725 // movne t, #1 | 2942 // movne t, #1 |
| 2726 // mov a, t | 2943 // mov a, t |
| 2727 // | 2944 // |
| 2728 // LLVM does: | 2945 // LLVM does: |
| 2729 // lsl tb, b, #<N> | 2946 // lsl tb, b, #<N> |
| 2730 // mov t, #0 | 2947 // mov t, #0 |
| 2731 // cmp tb, c, lsl #<N> | 2948 // cmp tb, c, lsl #<N> |
| 2732 // mov.<C> t, #1 | 2949 // mov.<C> t, #1 |
| 2733 // mov a, t | 2950 // mov a, t |
| 2734 // | 2951 // |
| 2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on | 2952 // the left shift is by 0, 16, or 24, which allows the comparison to focus on |
| 2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For | 2953 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
| 2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb | 2954 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
| 2738 // first. It's not clear to me why that special-casing is needed. | 2955 // first. It's not clear to me why that special-casing is needed. |
| 2739 // | 2956 // |
| 2740 // We'll go with the LLVM way for now, since it's shorter and has just as few | 2957 // We'll go with the LLVM way for now, since it's shorter and has just as few |
| 2741 // dependencies. | 2958 // dependencies. |
| 2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 2959 Operand *NonConstOp = nullptr; |
| 2743 assert(ShiftAmt >= 0); | 2960 int32_t Value; |
| 2744 Constant *ShiftConst = nullptr; | 2961 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 2745 Variable *Src0R = nullptr; | 2962 Value = C->getValue(); |
| 2746 if (ShiftAmt) { | 2963 NonConstOp = Src0; |
| 2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 2964 } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) { |
| 2748 Src0R = makeReg(IceType_i32); | 2965 Value = C->getValue(); |
| 2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 2966 NonConstOp = Src1; |
| 2750 } else { | |
| 2751 Src0R = legalizeToReg(Src0); | |
| 2752 } | 2967 } |
| 2753 if (ShiftAmt) { | 2968 |
| 2969 switch (Src0->getType()) { | |
| 2970 default: | |
| 2971 llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); | |
| 2972 case IceType_i64: | |
| 2973 return lowerInt64IcmpCond(Condition, Src0, Src1); | |
| 2974 case IceType_i8: | |
| 2975 case IceType_i16: { | |
| 2976 int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); | |
| 2977 assert(ShAmt >= 0); | |
| 2978 | |
| 2979 if (NonConstOp != nullptr) { | |
| 2980 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
| 2981 Value == 0) { | |
| 2982 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt); | |
| 2983 Variable *T = makeReg(IceType_i32); | |
| 2984 _lsls(T, legalizeToReg(NonConstOp), ShAmtOp); | |
| 2985 Context.insert(InstFakeUse::create(Func, T)); | |
| 2986 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 2987 } | |
| 2988 Variable *ConstR = makeReg(IceType_i32); | |
| 2989 _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt), | |
| 2990 Legal_Reg | Legal_Flex)); | |
| 2991 Operand *NonConstF = OperandARM32FlexReg::create( | |
| 2992 Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL, | |
| 2993 Ctx->getConstantInt32(ShAmt)); | |
| 2994 | |
| 2995 if (Src1 == NonConstOp) { | |
| 2996 _cmp(ConstR, NonConstF); | |
| 2997 } else { | |
| 2998 Variable *T = makeReg(IceType_i32); | |
| 2999 _rsbs(T, ConstR, NonConstF); | |
| 3000 Context.insert(InstFakeUse::create(Func, T)); | |
| 3001 } | |
| 3002 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3003 } | |
| 3004 | |
| 3005 Variable *Src0R = makeReg(IceType_i32); | |
| 3006 Operand *ShAmtF = | |
| 3007 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex); | |
| 3008 _lsl(Src0R, legalizeToReg(Src0), ShAmtF); | |
| 3009 | |
| 2754 Variable *Src1R = legalizeToReg(Src1); | 3010 Variable *Src1R = legalizeToReg(Src1); |
| 2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( | 3011 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create( |
| 2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); | 3012 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF); |
| 2757 _cmp(Src0R, Src1RShifted); | 3013 _cmp(Src0R, Src1F); |
| 2758 } else { | 3014 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 2759 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); | |
| 2760 _cmp(Src0R, Src1RF); | |
| 2761 } | 3015 } |
| 2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); | 3016 case IceType_i32: { |
| 3017 if (NonConstOp != nullptr) { | |
| 3018 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && | |
| 3019 Value == 0) { | |
| 3020 Variable *T = makeReg(IceType_i32); | |
| 3021 Variable *OpR = legalizeToReg(NonConstOp); | |
| 3022 _orrs(T, OpR, OpR); | |
| 3023 Context.insert(InstFakeUse::create(Func, T)); | |
| 3024 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3025 } | |
| 3026 | |
| 3027 Operand *ConstRF = | |
| 3028 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex); | |
| 3029 Variable *NonConstR = legalizeToReg(NonConstOp); | |
| 3030 | |
| 3031 if (Src0 == NonConstOp) { | |
| 3032 _cmp(NonConstR, ConstRF); | |
| 3033 } else { | |
| 3034 Variable *T = makeReg(IceType_i32); | |
| 3035 _rsbs(T, NonConstR, ConstRF); | |
| 3036 Context.insert(InstFakeUse::create(Func, T)); | |
| 3037 } | |
| 3038 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3039 } | |
| 3040 | |
| 3041 Variable *Src0R = legalizeToReg(Src0); | |
| 3042 Variable *Src1R = legalizeToReg(Src1); | |
| 3043 _cmp(Src0R, Src1R); | |
| 3044 return CondWhenTrue(getIcmp32Mapping(Condition)); | |
| 3045 } | |
| 3046 } | |
| 2763 } | 3047 } |
| 2764 | 3048 |
| 2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { | 3049 void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
| 2766 Variable *Dest = Inst->getDest(); | 3050 Variable *Dest = Inst->getDest(); |
| 2767 | 3051 |
| 2768 if (isVectorType(Dest->getType())) { | 3052 if (isVectorType(Dest->getType())) { |
| 2769 Variable *T = makeReg(Dest->getType()); | 3053 Variable *T = makeReg(Dest->getType()); |
| 2770 Context.insert(InstFakeDef::create(Func, T)); | 3054 Context.insert(InstFakeDef::create(Func, T)); |
| 2771 _mov(Dest, T); | 3055 _mov(Dest, T); |
| 2772 UnimplementedError(Func->getContext()->getFlags()); | 3056 UnimplementedError(Func->getContext()->getFlags()); |
| (...skipping 1474 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4247 } | 4531 } |
| 4248 return Reg; | 4532 return Reg; |
| 4249 } | 4533 } |
| 4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 4534 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
| 4251 Variable *Reg = makeReg(Ty, RegNum); | 4535 Variable *Reg = makeReg(Ty, RegNum); |
| 4252 _movw(Reg, C); | 4536 _movw(Reg, C); |
| 4253 _movt(Reg, C); | 4537 _movt(Reg, C); |
| 4254 return Reg; | 4538 return Reg; |
| 4255 } else { | 4539 } else { |
| 4256 assert(isScalarFloatingType(Ty)); | 4540 assert(isScalarFloatingType(Ty)); |
| 4541 uint32_t ModifiedImm; | |
| 4542 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | |
| 4543 Variable *T = makeReg(Ty, RegNum); | |
| 4544 _mov(T, | |
| 4545 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | |
| 4546 return T; | |
| 4547 } | |
| 4548 | |
| 4257 // Load floats/doubles from literal pool. | 4549 // Load floats/doubles from literal pool. |
| 4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an | |
| 4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified | |
| 4260 // immediate constants". Or, for 32-bit floating point numbers, just | |
| 4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG | |
| 4262 // instead of using a movw/movt pair to get the const-pool address then | |
| 4263 // loading to SREG. | |
| 4264 std::string Buffer; | 4550 std::string Buffer; |
| 4265 llvm::raw_string_ostream StrBuf(Buffer); | 4551 llvm::raw_string_ostream StrBuf(Buffer); |
| 4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 4552 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
| 4267 llvm::cast<Constant>(From)->setShouldBePooled(true); | 4553 llvm::cast<Constant>(From)->setShouldBePooled(true); |
| 4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 4554 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
| 4269 Variable *BaseReg = makeReg(getPointerType()); | 4555 Variable *BaseReg = makeReg(getPointerType()); |
| 4270 _movw(BaseReg, Offset); | 4556 _movw(BaseReg, Offset); |
| 4271 _movt(BaseReg, Offset); | 4557 _movt(BaseReg, Offset); |
| 4272 From = formMemoryOperand(BaseReg, Ty); | 4558 From = formMemoryOperand(BaseReg, Ty); |
| 4273 return copyToReg(From, RegNum); | 4559 return copyToReg(From, RegNum); |
| (...skipping 625 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 5185 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 4900 // However, for compatibility with current NaCl LLVM, don't claim that. | 5186 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 5187 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 4902 } | 5188 } |
| 4903 | 5189 |
| 4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 5190 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
| 4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 5191 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
| 4906 llvm::SmallBitVector TargetARM32::ScratchRegs; | 5192 llvm::SmallBitVector TargetARM32::ScratchRegs; |
| 4907 | 5193 |
| 4908 } // end of namespace Ice | 5194 } // end of namespace Ice |
| OLD | NEW |