src/IceTargetLoweringARM32.cpp - Issue 1438773004: Subzero. ARM32. Improve constant lowering.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fixes the lit tests. Double is too precise. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1279 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1290 }	1290 }

1291 _mov(Dest, SP);	1291 _mov(Dest, SP);

1292 }	1292 }

1293	1293

1294 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {	1294 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {

1295 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))	1295 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))

1296 return;	1296 return;

1297 Variable *SrcLoReg = legalizeToReg(SrcLo);	1297 Variable *SrcLoReg = legalizeToReg(SrcLo);

1298 switch (Ty) {	1298 switch (Ty) {

1299 default:	1299 default:

1300 llvm_unreachable("Unexpected type");	1300 llvm::report_fatal_error("Unexpected type");

1301 case IceType_i8: {	1301 case IceType_i8:

1302 Operand *Mask =

1303 legalize(Ctx->getConstantInt32(0xFF), Legal_Reg \| Legal_Flex);

1304 _tst(SrcLoReg, Mask);

1305 break;

1306 }

1307 case IceType_i16: {	1302 case IceType_i16: {

1308 Operand *Mask =	1303 Operand *ShAmtF =

1309 legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg \| Legal_Flex);	1304 legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),

1310 _tst(SrcLoReg, Mask);	1305 Legal_Reg \| Legal_Flex);

1311 break;	1306 Variable *T = makeReg(IceType_i32);

1312 }	1307 _lsls(T, SrcLoReg, ShAmtF);

	1308 Context.insert(InstFakeUse::create(Func, T));

	1309 } break;

1313 case IceType_i32: {	1310 case IceType_i32: {

1314 _tst(SrcLoReg, SrcLoReg);	1311 _tst(SrcLoReg, SrcLoReg);

1315 break;	1312 break;

1316 }	1313 }

1317 case IceType_i64: {	1314 case IceType_i64: {

1318 Variable *ScratchReg = makeReg(IceType_i32);	1315 Variable *T = makeReg(IceType_i32);

1319 _orrs(ScratchReg, SrcLoReg, SrcHi);	1316 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg \| Legal_Flex));

1320 // ScratchReg isn't going to be used, but we need the side-effect of	1317 // T isn't going to be used, but we need the side-effect of setting flags

1321 // setting flags from this operation.	1318 // from this operation.

1322 Context.insert(InstFakeUse::create(Func, ScratchReg));	1319 Context.insert(InstFakeUse::create(Func, T));

1323 }	1320 }

1324 }	1321 }

1325 InstARM32Label *Label = InstARM32Label::create(Func, this);	1322 InstARM32Label *Label = InstARM32Label::create(Func, this);

1326 _br(Label, CondARM32::NE);	1323 _br(Label, CondARM32::NE);

1327 _trap();	1324 _trap();

1328 Context.insert(Label);	1325 Context.insert(Label);

1329 }	1326 }

1330	1327

1331 void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,	1328 void TargetARM32::lowerIDivRem(Variable Dest, Variable T, Variable *Src0R,

1332 Operand *Src1, ExtInstr ExtFunc,	1329 Operand *Src1, ExtInstr ExtFunc,

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1397 _orr(T, Src0, Src1RF);	1394 _orr(T, Src0, Src1RF);

1398 break;	1395 break;

1399 case InstArithmetic::Xor:	1396 case InstArithmetic::Xor:

1400 _eor(T, Src0, Src1RF);	1397 _eor(T, Src0, Src1RF);

1401 break;	1398 break;

1402 }	1399 }

1403 _mov(Dest, T);	1400 _mov(Dest, T);

1404 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;	1401 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;

1405 }	1402 }

1406	1403

	1404 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,

	1405 Variable Dest, Operand Src0,

	1406 Operand *Src1) {

	1407 // These helper-call-involved instructions are lowered in this separate

	1408 // switch. This is because we would otherwise assume that we need to

	1409 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

	1410 // helper calls, and such unused/redundant instructions will fail liveness

	1411 // analysis under -Om1 setting.

	1412 switch (Op) {

	1413 default:

	1414 break;

	1415 case InstArithmetic::Udiv:

	1416 case InstArithmetic::Sdiv:

	1417 case InstArithmetic::Urem:

	1418 case InstArithmetic::Srem: {

	1419 // Check for divide by 0 (ARM normally doesn't trap, but we want it to

	1420 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

	1421 // register, which will hide a constant source operand. Instead, check

	1422 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

	1423 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

	1424 if (C64->getValue() == 0) {

	1425 _trap();

	1426 return;

	1427 }

	1428 } else {

	1429 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

	1430 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

	1431 div0Check(IceType_i64, Src1Lo, Src1Hi);

	1432 }

	1433 // Technically, ARM has their own aeabi routines, but we can use the
	sehr 2015/11/13 21:56:29 either "has its" or "have their". either "has its" or "have their". John 2015/11/14 00:00:38 For a moment I thought this was Jim. :) Done. Show quoted text On 2015/11/13 21:56:29, sehr (please use this account) wrote: > either "has its" or "have their". For a moment I thought this was Jim. :) Done.
	1434 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

	1435 // the more standard __moddi3 for rem.

	1436 const char *HelperName = "";

	1437 switch (Op) {

	1438 default:

	1439 llvm::report_fatal_error("Should have only matched div ops.");

	1440 break;

	1441 case InstArithmetic::Udiv:

	1442 HelperName = H_udiv_i64;

	1443 break;

	1444 case InstArithmetic::Sdiv:

	1445 HelperName = H_sdiv_i64;

	1446 break;

	1447 case InstArithmetic::Urem:

	1448 HelperName = H_urem_i64;

	1449 break;

	1450 case InstArithmetic::Srem:

	1451 HelperName = H_srem_i64;

	1452 break;

	1453 }

	1454 constexpr SizeT MaxSrcs = 2;

	1455 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

	1456 Call->addArg(Src0);

	1457 Call->addArg(Src1);

	1458 lowerCall(Call);

	1459 return;

	1460 }

	1461 }

	1462

	1463 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1464 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1465 Variable *Src0RLo = nullptr;

	1466 Variable *Src0RHi = nullptr;

	1467 // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr.
	Jim Stichnoth 2015/11/16 13:56:10 s/got/for/ ? Lshr s/got/for/ ? Lshr
	1468 if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) {

	1469 Src0RLo = legalizeToReg(loOperand(Src0));

	1470 }

	1471 if (Op != InstArithmetic::Shl) {

	1472 Src0RHi = legalizeToReg(hiOperand(Src0));

	1473 }

	1474 Operand *Src1Lo = loOperand(Src1);

	1475 Operand *Src1Hi = hiOperand(Src1);

	1476 Variable *T_Lo = makeReg(DestLo->getType());

	1477 Variable *T_Hi = makeReg(DestHi->getType());

	1478

	1479 switch (Op) {

	1480 case InstArithmetic::_num:

	1481 llvm::report_fatal_error("Unknown arithmetic operator");

	1482 return;

	1483 case InstArithmetic::Add:

	1484 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

	1485 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

	1486 _adds(T_Lo, Src0RLo, Src1Lo);

	1487 _mov(DestLo, T_Lo);

	1488 _adc(T_Hi, Src0RHi, Src1Hi);

	1489 _mov(DestHi, T_Hi);

	1490 return;

	1491 case InstArithmetic::And:

	1492 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

	1493 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

	1494 _and(T_Lo, Src0RLo, Src1Lo);

	1495 _mov(DestLo, T_Lo);

	1496 _and(T_Hi, Src0RHi, Src1Hi);

	1497 _mov(DestHi, T_Hi);

	1498 return;

	1499 case InstArithmetic::Or:

	1500 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

	1501 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

	1502 _orr(T_Lo, Src0RLo, Src1Lo);

	1503 _mov(DestLo, T_Lo);

	1504 _orr(T_Hi, Src0RHi, Src1Hi);

	1505 _mov(DestHi, T_Hi);

	1506 return;

	1507 case InstArithmetic::Xor:

	1508 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

	1509 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

	1510 _eor(T_Lo, Src0RLo, Src1Lo);

	1511 _mov(DestLo, T_Lo);

	1512 _eor(T_Hi, Src0RHi, Src1Hi);

	1513 _mov(DestHi, T_Hi);

	1514 return;

	1515 case InstArithmetic::Sub:

	1516 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

	1517 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

	1518 _subs(T_Lo, Src0RLo, Src1Lo);

	1519 _mov(DestLo, T_Lo);

	1520 _sbc(T_Hi, Src0RHi, Src1Hi);

	1521 _mov(DestHi, T_Hi);

	1522 return;

	1523 case InstArithmetic::Mul: {

	1524 // GCC 4.8 does:

	1525 // a=b*c ==>

	1526 // t_acc =(mul) (b.lo * c.hi)

	1527 // t_acc =(mla) (c.lo * b.hi) + t_acc

	1528 // t.hi,t.lo =(umull) b.lo * c.lo

	1529 // t.hi += t_acc

	1530 // a.lo = t.lo

	1531 // a.hi = t.hi

	1532 //

	1533 // LLVM does:

	1534 // t.hi,t.lo =(umull) b.lo * c.lo

	1535 // t.hi =(mla) (b.lo * c.hi) + t.hi

	1536 // t.hi =(mla) (b.hi * c.lo) + t.hi

	1537 // a.lo = t.lo

	1538 // a.hi = t.hi

	1539 //

	1540 // LLVM's lowering has fewer instructions, but more register pressure:

	1541 // t.lo is live from beginning to end, while GCC delays the two-dest

	1542 // instruction till the end, and kills c.hi immediately.

	1543 Variable *T_Acc = makeReg(IceType_i32);

	1544 Variable *T_Acc1 = makeReg(IceType_i32);

	1545 Variable *T_Hi1 = makeReg(IceType_i32);

	1546 Variable *Src1RLo = legalizeToReg(Src1Lo);

	1547 Variable *Src1RHi = legalizeToReg(Src1Hi);

	1548 _mul(T_Acc, Src0RLo, Src1RHi);

	1549 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

	1550 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

	1551 _add(T_Hi, T_Hi1, T_Acc1);

	1552 _mov(DestLo, T_Lo);

	1553 _mov(DestHi, T_Hi);

	1554 return;

	1555 }

	1556 case InstArithmetic::Shl: {

	1557 assert(Src0RLo != nullptr);

	1558 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {

	1559 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

	1560 const int32_t ShAmtImm = C->getValue() & 0x3F;

	1561 if (ShAmtImm == 0) {

	1562 Src0RHi = legalizeToReg(hiOperand(Src0));

	1563 _mov(DestLo, Src0RLo);

	1564 _mov(DestHi, Src0RHi);

	1565 return;

	1566 }

	1567

	1568 if (ShAmtImm >= 32) {

	1569 if (ShAmtImm == 32) {

	1570 _mov(DestHi, Src0RLo);

	1571 } else {

	1572 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

	1573 Legal_Reg \| Legal_Flex);

	1574 _lsl(T_Hi, Src0RLo, ShAmtOp);

	1575 _mov(DestHi, T_Hi);

	1576 }

	1577

	1578 Operand *_0 =

	1579 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg \| Legal_Flex);

	1580 _mov(T_Lo, _0);

	1581 _mov(DestLo, T_Lo);

	1582 return;

	1583 }

	1584

	1585 Src0RHi = legalizeToReg(hiOperand(Src0));

	1586 Operand *ShAmtOp =

	1587 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg \| Legal_Flex);

	1588 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

	1589 Legal_Reg \| Legal_Flex);

	1590 _lsl(T_Hi, Src0RHi, ShAmtOp);

	1591 _orr(T_Hi, T_Hi,

	1592 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

	1593 OperandARM32::LSR, ComplShAmtOp));

	1594 _mov(DestHi, T_Hi);

	1595

	1596 _lsl(T_Lo, Src0RLo, ShAmtOp);

	1597 _mov(DestLo, T_Lo);

	1598 return;

	1599 }

	1600

	1601 // a=b<<c ==>

	1602 // pnacl-llc does:

	1603 // mov t_b.lo, b.lo

	1604 // mov t_b.hi, b.hi

	1605 // mov t_c.lo, c.lo

	1606 // rsb T0, t_c.lo, #32

	1607 // lsr T1, t_b.lo, T0

	1608 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

	1609 // sub T2, t_c.lo, #32

	1610 // cmp T2, #0

	1611 // lslge t_a.hi, t_b.lo, T2

	1612 // lsl t_a.lo, t_b.lo, t_c.lo

	1613 // mov a.lo, t_a.lo

	1614 // mov a.hi, t_a.hi

	1615 //

	1616 // GCC 4.8 does:

	1617 // sub t_c1, c.lo, #32

	1618 // lsl t_hi, b.hi, c.lo

	1619 // orr t_hi, t_hi, b.lo, lsl t_c1

	1620 // rsb t_c2, c.lo, #32

	1621 // orr t_hi, t_hi, b.lo, lsr t_c2

	1622 // lsl t_lo, b.lo, c.lo

	1623 // a.lo = t_lo

	1624 // a.hi = t_hi

	1625 //

	1626 // These are incompatible, therefore we mimic pnacl-llc.

	1627 // Can be strength-reduced for constant-shifts, but we don't do that for

	1628 // now.

	1629 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

	1630 // ARM, shifts only take the lower 8 bits of the shift register, and

	1631 // saturate to the range 0-32, so the negative value will saturate to 32.

	1632 Constant *_32 = Ctx->getConstantInt32(32);

	1633 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	1634 Src0RHi = legalizeToReg(hiOperand(Src0));

	1635 Variable *Src1RLo = legalizeToReg(Src1Lo);

	1636 Variable *T0 = makeReg(IceType_i32);

	1637 Variable *T1 = makeReg(IceType_i32);

	1638 Variable *T2 = makeReg(IceType_i32);

	1639 Variable *TA_Hi = makeReg(IceType_i32);

	1640 Variable *TA_Lo = makeReg(IceType_i32);

	1641 _rsb(T0, Src1RLo, _32);

	1642 _lsr(T1, Src0RLo, T0);

	1643 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1644 OperandARM32::LSL, Src1RLo));

	1645 _sub(T2, Src1RLo, _32);

	1646 _cmp(T2, _0);

	1647 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

	1648 _set_dest_redefined();

	1649 _lsl(TA_Lo, Src0RLo, Src1RLo);

	1650 _mov(DestLo, TA_Lo);

	1651 _mov(DestHi, TA_Hi);

	1652 return;

	1653 }

	1654 case InstArithmetic::Lshr:

	1655 case InstArithmetic::Ashr: {

	1656 assert(Src0RHi != nullptr);

	1657 const bool ASR = Op == InstArithmetic::Ashr;

	1658 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {

	1659 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

	1660 const int32_t ShAmtImm = C->getValue() & 0x3F;

	1661 if (ShAmtImm == 0) {

	1662 Src0RLo = legalizeToReg(loOperand(Src0));

	1663 _mov(DestLo, Src0RLo);

	1664 _mov(DestHi, Src0RHi);

	1665 return;

	1666 }

	1667

	1668 if (ShAmtImm >= 32) {

	1669 if (ShAmtImm == 32) {

	1670 _mov(DestLo, Src0RHi);

	1671 } else {

	1672 Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

	1673 Legal_Reg \| Legal_Flex);

	1674 if (ASR) {

	1675 _asr(T_Lo, Src0RHi, ShAmtOp);

	1676 } else {

	1677 _lsr(T_Lo, Src0RHi, ShAmtOp);

	1678 }

	1679 _mov(DestLo, T_Lo);

	1680 }

	1681

	1682 if (ASR) {

	1683 Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),

	1684 Legal_Reg \| Legal_Flex);

	1685 _asr(T_Hi, Src0RHi, _31);

	1686 } else {

	1687 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),

	1688 Legal_Reg \| Legal_Flex);

	1689 _mov(T_Hi, _0);

	1690 }

	1691 _mov(DestHi, T_Hi);

	1692 return;

	1693 }

	1694

	1695 Src0RLo = legalizeToReg(loOperand(Src0));

	1696 Operand *ShAmtOp =

	1697 legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg \| Legal_Flex);

	1698 Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

	1699 Legal_Reg \| Legal_Flex);

	1700 _lsr(T_Lo, Src0RLo, ShAmtOp);

	1701 _orr(T_Lo, T_Lo,

	1702 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1703 OperandARM32::LSL, ComplShAmtOp));

	1704 _mov(DestLo, T_Lo);

	1705

	1706 if (ASR) {

	1707 _asr(T_Hi, Src0RHi, ShAmtOp);

	1708 } else {

	1709 _lsr(T_Hi, Src0RHi, ShAmtOp);

	1710 }

	1711 _mov(DestHi, T_Hi);

	1712 return;

	1713 }

	1714

	1715 // a=b>>c

	1716 // pnacl-llc does:

	1717 // mov t_b.lo, b.lo

	1718 // mov t_b.hi, b.hi

	1719 // mov t_c.lo, c.lo

	1720 // lsr T0, t_b.lo, t_c.lo

	1721 // rsb T1, t_c.lo, #32

	1722 // orr t_a.lo, T0, t_b.hi, lsl T1

	1723 // sub T2, t_c.lo, #32

	1724 // cmp T2, #0

	1725 // [al]srge t_a.lo, t_b.hi, T2

	1726 // [al]sr t_a.hi, t_b.hi, t_c.lo

	1727 // mov a.lo, t_a.lo

	1728 // mov a.hi, t_a.hi

	1729 //

	1730 // GCC 4.8 does (lsr):

	1731 // rsb t_c1, c.lo, #32

	1732 // lsr t_lo, b.lo, c.lo

	1733 // orr t_lo, t_lo, b.hi, lsl t_c1

	1734 // sub t_c2, c.lo, #32

	1735 // orr t_lo, t_lo, b.hi, lsr t_c2

	1736 // lsr t_hi, b.hi, c.lo

	1737 // mov a.lo, t_lo

	1738 // mov a.hi, t_hi

	1739 //

	1740 // These are incompatible, therefore we mimic pnacl-llc.

	1741 const bool IsAshr = Op == InstArithmetic::Ashr;

	1742 Constant *_32 = Ctx->getConstantInt32(32);

	1743 Constant *_0 = Ctx->getConstantZero(IceType_i32);

	1744 Src0RLo = legalizeToReg(loOperand(Src0));

	1745 Variable *Src1RLo = legalizeToReg(Src1Lo);

	1746 Variable *T0 = makeReg(IceType_i32);

	1747 Variable *T1 = makeReg(IceType_i32);

	1748 Variable *T2 = makeReg(IceType_i32);

	1749 Variable *TA_Lo = makeReg(IceType_i32);

	1750 Variable *TA_Hi = makeReg(IceType_i32);

	1751 _lsr(T0, Src0RLo, Src1RLo);

	1752 _rsb(T1, Src1RLo, _32);

	1753 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

	1754 OperandARM32::LSL, T1));

	1755 _sub(T2, Src1RLo, _32);

	1756 _cmp(T2, _0);

	1757 if (IsAshr) {

	1758 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

	1759 _set_dest_redefined();

	1760 _asr(TA_Hi, Src0RHi, Src1RLo);

	1761 } else {

	1762 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

	1763 _set_dest_redefined();

	1764 _lsr(TA_Hi, Src0RHi, Src1RLo);

	1765 }

	1766 _mov(DestLo, TA_Lo);

	1767 _mov(DestHi, TA_Hi);

	1768 return;

	1769 }

	1770 case InstArithmetic::Fadd:

	1771 case InstArithmetic::Fsub:

	1772 case InstArithmetic::Fmul:

	1773 case InstArithmetic::Fdiv:

	1774 case InstArithmetic::Frem:

	1775 llvm::report_fatal_error("FP instruction with i64 type");

	1776 return;

	1777 case InstArithmetic::Udiv:

	1778 case InstArithmetic::Sdiv:

	1779 case InstArithmetic::Urem:

	1780 case InstArithmetic::Srem:

	1781 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "

	1782 "should have already been handled before");

	1783 return;

	1784 }

	1785 }

	1786

1407 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {	1787 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

1408 Variable *Dest = Inst->getDest();	1788 Variable *Dest = Inst->getDest();

1409 if (Dest->getType() == IceType_i1) {	1789 if (Dest->getType() == IceType_i1) {

1410 lowerInt1Arithmetic(Inst);	1790 lowerInt1Arithmetic(Inst);

1411 return;	1791 return;

1412 }	1792 }

1413	1793

1414 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to	1794 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to

1415 // legalize Src0 to flex or Src1 to flex and there is a reversible	1795 // legalize Src0 to flex or Src1 to flex and there is a reversible

1416 // instruction. E.g., reverse subtract with immediate, register vs register,	1796 // instruction. E.g., reverse subtract with immediate, register vs register,

1417 // immediate.	1797 // immediate.

1418 // Or it may be the case that the operands aren't swapped, but the bits can	1798 // Or it may be the case that the operands aren't swapped, but the bits can

1419 // be flipped and a different operation applied. E.g., use BIC (bit clear)	1799 // be flipped and a different operation applied. E.g., use BIC (bit clear)

1420 // instead of AND for some masks.	1800 // instead of AND for some masks.

1421 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	1801 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

1422 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	1802 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

1423 if (Dest->getType() == IceType_i64) {	1803 if (Dest->getType() == IceType_i64) {

1424 // These helper-call-involved instructions are lowered in this separate	1804 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);

1425 // switch. This is because we would otherwise assume that we need to

1426 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

1427 // helper calls, and such unused/redundant instructions will fail liveness

1428 // analysis under -Om1 setting.

1429 switch (Inst->getOp()) {

1430 default:

1431 break;

1432 case InstArithmetic::Udiv:

1433 case InstArithmetic::Sdiv:

1434 case InstArithmetic::Urem:

1435 case InstArithmetic::Srem: {

1436 // Check for divide by 0 (ARM normally doesn't trap, but we want it to

1437 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

1438 // register, which will hide a constant source operand. Instead, check

1439 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

1440 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

1441 if (C64->getValue() == 0) {

1442 _trap();

1443 return;

1444 }

1445 } else {

1446 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

1447 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

1448 div0Check(IceType_i64, Src1Lo, Src1Hi);

1449 }

1450 // Technically, ARM has their own aeabi routines, but we can use the

1451 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

1452 // the more standard __moddi3 for rem.

1453 const char *HelperName = "";

1454 switch (Inst->getOp()) {

1455 default:

1456 llvm_unreachable("Should have only matched div ops.");

1457 break;

1458 case InstArithmetic::Udiv:

1459 HelperName = H_udiv_i64;

1460 break;

1461 case InstArithmetic::Sdiv:

1462 HelperName = H_sdiv_i64;

1463 break;

1464 case InstArithmetic::Urem:

1465 HelperName = H_urem_i64;

1466 break;

1467 case InstArithmetic::Srem:

1468 HelperName = H_srem_i64;

1469 break;

1470 }

1471 constexpr SizeT MaxSrcs = 2;

1472 InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

1473 Call->addArg(Src0);

1474 Call->addArg(Src1);

1475 lowerCall(Call);

1476 return;

1477 }

1478 }

1479 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1480 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1481 Variable *Src0RLo = legalizeToReg(loOperand(Src0));

1482 Variable *Src0RHi = legalizeToReg(hiOperand(Src0));

1483 Operand *Src1Lo = loOperand(Src1);

1484 Operand *Src1Hi = hiOperand(Src1);

1485 Variable *T_Lo = makeReg(DestLo->getType());

1486 Variable *T_Hi = makeReg(DestHi->getType());

1487 switch (Inst->getOp()) {

1488 case InstArithmetic::_num:

1489 llvm_unreachable("Unknown arithmetic operator");

1490 return;

1491 case InstArithmetic::Add:

1492 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1493 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1494 _adds(T_Lo, Src0RLo, Src1Lo);

1495 _mov(DestLo, T_Lo);

1496 _adc(T_Hi, Src0RHi, Src1Hi);

1497 _mov(DestHi, T_Hi);

1498 return;

1499 case InstArithmetic::And:

1500 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1501 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1502 _and(T_Lo, Src0RLo, Src1Lo);

1503 _mov(DestLo, T_Lo);

1504 _and(T_Hi, Src0RHi, Src1Hi);

1505 _mov(DestHi, T_Hi);

1506 return;

1507 case InstArithmetic::Or:

1508 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1509 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1510 _orr(T_Lo, Src0RLo, Src1Lo);

1511 _mov(DestLo, T_Lo);

1512 _orr(T_Hi, Src0RHi, Src1Hi);

1513 _mov(DestHi, T_Hi);

1514 return;

1515 case InstArithmetic::Xor:

1516 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1517 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1518 _eor(T_Lo, Src0RLo, Src1Lo);

1519 _mov(DestLo, T_Lo);

1520 _eor(T_Hi, Src0RHi, Src1Hi);

1521 _mov(DestHi, T_Hi);

1522 return;

1523 case InstArithmetic::Sub:

1524 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Flex);

1525 Src1Hi = legalize(Src1Hi, Legal_Reg \| Legal_Flex);

1526 _subs(T_Lo, Src0RLo, Src1Lo);

1527 _mov(DestLo, T_Lo);

1528 _sbc(T_Hi, Src0RHi, Src1Hi);

1529 _mov(DestHi, T_Hi);

1530 return;

1531 case InstArithmetic::Mul: {

1532 // GCC 4.8 does:

1533 // a=b*c ==>

1534 // t_acc =(mul) (b.lo * c.hi)

1535 // t_acc =(mla) (c.lo * b.hi) + t_acc

1536 // t.hi,t.lo =(umull) b.lo * c.lo

1537 // t.hi += t_acc

1538 // a.lo = t.lo

1539 // a.hi = t.hi

1540 //

1541 // LLVM does:

1542 // t.hi,t.lo =(umull) b.lo * c.lo

1543 // t.hi =(mla) (b.lo * c.hi) + t.hi

1544 // t.hi =(mla) (b.hi * c.lo) + t.hi

1545 // a.lo = t.lo

1546 // a.hi = t.hi

1547 //

1548 // LLVM's lowering has fewer instructions, but more register pressure:

1549 // t.lo is live from beginning to end, while GCC delays the two-dest

1550 // instruction till the end, and kills c.hi immediately.

1551 Variable *T_Acc = makeReg(IceType_i32);

1552 Variable *T_Acc1 = makeReg(IceType_i32);

1553 Variable *T_Hi1 = makeReg(IceType_i32);

1554 Variable *Src1RLo = legalizeToReg(Src1Lo);

1555 Variable *Src1RHi = legalizeToReg(Src1Hi);

1556 _mul(T_Acc, Src0RLo, Src1RHi);

1557 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

1558 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

1559 _add(T_Hi, T_Hi1, T_Acc1);

1560 _mov(DestLo, T_Lo);

1561 _mov(DestHi, T_Hi);

1562 return;

1563 }

1564 case InstArithmetic::Shl: {

1565 // a=b<<c ==>

1566 // pnacl-llc does:

1567 // mov t_b.lo, b.lo

1568 // mov t_b.hi, b.hi

1569 // mov t_c.lo, c.lo

1570 // rsb T0, t_c.lo, #32

1571 // lsr T1, t_b.lo, T0

1572 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

1573 // sub T2, t_c.lo, #32

1574 // cmp T2, #0

1575 // lslge t_a.hi, t_b.lo, T2

1576 // lsl t_a.lo, t_b.lo, t_c.lo

1577 // mov a.lo, t_a.lo

1578 // mov a.hi, t_a.hi

1579 //

1580 // GCC 4.8 does:

1581 // sub t_c1, c.lo, #32

1582 // lsl t_hi, b.hi, c.lo

1583 // orr t_hi, t_hi, b.lo, lsl t_c1

1584 // rsb t_c2, c.lo, #32

1585 // orr t_hi, t_hi, b.lo, lsr t_c2

1586 // lsl t_lo, b.lo, c.lo

1587 // a.lo = t_lo

1588 // a.hi = t_hi

1589 //

1590 // These are incompatible, therefore we mimic pnacl-llc.

1591 // Can be strength-reduced for constant-shifts, but we don't do that for

1592 // now.

1593 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

1594 // ARM, shifts only take the lower 8 bits of the shift register, and

1595 // saturate to the range 0-32, so the negative value will saturate to 32.

1596 Constant *_32 = Ctx->getConstantInt32(32);

1597 Constant *_0 = Ctx->getConstantZero(IceType_i32);

1598 Variable *Src1RLo = legalizeToReg(Src1Lo);

1599 Variable *T0 = makeReg(IceType_i32);

1600 Variable *T1 = makeReg(IceType_i32);

1601 Variable *T2 = makeReg(IceType_i32);

1602 Variable *TA_Hi = makeReg(IceType_i32);

1603 Variable *TA_Lo = makeReg(IceType_i32);

1604 _rsb(T0, Src1RLo, _32);

1605 _lsr(T1, Src0RLo, T0);

1606 _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

1607 OperandARM32::LSL, Src1RLo));

1608 _sub(T2, Src1RLo, _32);

1609 _cmp(T2, _0);

1610 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

1611 _set_dest_redefined();

1612 _lsl(TA_Lo, Src0RLo, Src1RLo);

1613 _mov(DestLo, TA_Lo);

1614 _mov(DestHi, TA_Hi);

1615 return;

1616 }

1617 case InstArithmetic::Lshr:

1618 case InstArithmetic::Ashr: {

1619 // a=b>>c

1620 // pnacl-llc does:

1621 // mov t_b.lo, b.lo

1622 // mov t_b.hi, b.hi

1623 // mov t_c.lo, c.lo

1624 // lsr T0, t_b.lo, t_c.lo

1625 // rsb T1, t_c.lo, #32

1626 // orr t_a.lo, T0, t_b.hi, lsl T1

1627 // sub T2, t_c.lo, #32

1628 // cmp T2, #0

1629 // [al]srge t_a.lo, t_b.hi, T2

1630 // [al]sr t_a.hi, t_b.hi, t_c.lo

1631 // mov a.lo, t_a.lo

1632 // mov a.hi, t_a.hi

1633 //

1634 // GCC 4.8 does (lsr):

1635 // rsb t_c1, c.lo, #32

1636 // lsr t_lo, b.lo, c.lo

1637 // orr t_lo, t_lo, b.hi, lsl t_c1

1638 // sub t_c2, c.lo, #32

1639 // orr t_lo, t_lo, b.hi, lsr t_c2

1640 // lsr t_hi, b.hi, c.lo

1641 // mov a.lo, t_lo

1642 // mov a.hi, t_hi

1643 //

1644 // These are incompatible, therefore we mimic pnacl-llc.

1645 const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;

1646 Constant *_32 = Ctx->getConstantInt32(32);

1647 Constant *_0 = Ctx->getConstantZero(IceType_i32);

1648 Variable *Src1RLo = legalizeToReg(Src1Lo);

1649 Variable *T0 = makeReg(IceType_i32);

1650 Variable *T1 = makeReg(IceType_i32);

1651 Variable *T2 = makeReg(IceType_i32);

1652 Variable *TA_Lo = makeReg(IceType_i32);

1653 Variable *TA_Hi = makeReg(IceType_i32);

1654 _lsr(T0, Src0RLo, Src1RLo);

1655 _rsb(T1, Src1RLo, _32);

1656 _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

1657 OperandARM32::LSL, T1));

1658 _sub(T2, Src1RLo, _32);

1659 _cmp(T2, _0);

1660 if (IsAshr) {

1661 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

1662 _set_dest_redefined();

1663 _asr(TA_Hi, Src0RHi, Src1RLo);

1664 } else {

1665 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

1666 _set_dest_redefined();

1667 _lsr(TA_Hi, Src0RHi, Src1RLo);

1668 }

1669 _mov(DestLo, TA_Lo);

1670 _mov(DestHi, TA_Hi);

1671 return;

1672 }

1673 case InstArithmetic::Fadd:

1674 case InstArithmetic::Fsub:

1675 case InstArithmetic::Fmul:

1676 case InstArithmetic::Fdiv:

1677 case InstArithmetic::Frem:

1678 llvm_unreachable("FP instruction with i64 type");

1679 return;

1680 case InstArithmetic::Udiv:

1681 case InstArithmetic::Sdiv:

1682 case InstArithmetic::Urem:

1683 case InstArithmetic::Srem:

1684 llvm_unreachable("Call-helper-involved instruction for i64 type "

1685 "should have already been handled before");

1686 return;

1687 }

1688 return;	1805 return;

1689 } else if (isVectorType(Dest->getType())) {	1806 }

	1807

	1808 if (isVectorType(Dest->getType())) {

1690 // Add a fake def to keep liveness consistent in the meantime.	1809 // Add a fake def to keep liveness consistent in the meantime.

1691 Variable *T = makeReg(Dest->getType());	1810 Variable *T = makeReg(Dest->getType());

1692 Context.insert(InstFakeDef::create(Func, T));	1811 Context.insert(InstFakeDef::create(Func, T));

1693 _mov(Dest, T);	1812 _mov(Dest, T);

1694 UnimplementedError(Func->getContext()->getFlags());	1813 UnimplementedError(Func->getContext()->getFlags());

1695 return;	1814 return;

1696 }	1815 }

	1816

1697 // Dest->getType() is a non-i64 scalar.	1817 // Dest->getType() is a non-i64 scalar.

1698 Variable *Src0R = legalizeToReg(Src0);	1818 Variable *Src0R = legalizeToReg(Src0);

1699 Variable *T = makeReg(Dest->getType());	1819 Variable *T = makeReg(Dest->getType());

	1820

1700 // Handle div/rem separately. They require a non-legalized Src1 to inspect	1821 // Handle div/rem separately. They require a non-legalized Src1 to inspect

1701 // whether or not Src1 is a non-zero constant. Once legalized it is more	1822 // whether or not Src1 is a non-zero constant. Once legalized it is more

1702 // difficult to determine (constant may be moved to a register).	1823 // difficult to determine (constant may be moved to a register).

1703 switch (Inst->getOp()) {	1824 switch (Inst->getOp()) {

1704 default:	1825 default:

1705 break;	1826 break;

1706 case InstArithmetic::Udiv: {	1827 case InstArithmetic::Udiv: {

1707 constexpr bool NotRemainder = false;	1828 constexpr bool NotRemainder = false;

1708 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,	1829 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,

1709 H_udiv_i32, NotRemainder);	1830 H_udiv_i32, NotRemainder);

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1766 Variable *Src1R = legalizeToReg(Src1);	1887 Variable *Src1R = legalizeToReg(Src1);

1767 _vdiv(T, Src0R, Src1R);	1888 _vdiv(T, Src0R, Src1R);

1768 _mov(Dest, T);	1889 _mov(Dest, T);

1769 return;	1890 return;

1770 }	1891 }

1771 }	1892 }

1772	1893

1773 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);	1894 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);

1774 switch (Inst->getOp()) {	1895 switch (Inst->getOp()) {

1775 case InstArithmetic::_num:	1896 case InstArithmetic::_num:

1776 llvm_unreachable("Unknown arithmetic operator");	1897 llvm::report_fatal_error("Unknown arithmetic operator");

1777 return;	1898 return;

1778 case InstArithmetic::Add:	1899 case InstArithmetic::Add:

1779 _add(T, Src0R, Src1RF);	1900 _add(T, Src0R, Src1RF);

1780 _mov(Dest, T);	1901 _mov(Dest, T);

1781 return;	1902 return;

1782 case InstArithmetic::And:	1903 case InstArithmetic::And:

1783 _and(T, Src0R, Src1RF);	1904 _and(T, Src0R, Src1RF);

1784 _mov(Dest, T);	1905 _mov(Dest, T);

1785 return;	1906 return;

1786 case InstArithmetic::Or:	1907 case InstArithmetic::Or:

(...skipping 29 matching lines...) Expand all Loading...
1816 if (Dest->getType() != IceType_i32) {	1937 if (Dest->getType() != IceType_i32) {

1817 _sxt(Src0R, Src0R);	1938 _sxt(Src0R, Src0R);

1818 }	1939 }

1819 _asr(T, Src0R, Src1RF);	1940 _asr(T, Src0R, Src1RF);

1820 _mov(Dest, T);	1941 _mov(Dest, T);

1821 return;	1942 return;

1822 case InstArithmetic::Udiv:	1943 case InstArithmetic::Udiv:

1823 case InstArithmetic::Sdiv:	1944 case InstArithmetic::Sdiv:

1824 case InstArithmetic::Urem:	1945 case InstArithmetic::Urem:

1825 case InstArithmetic::Srem:	1946 case InstArithmetic::Srem:

1826 llvm_unreachable("Integer div/rem should have been handled earlier.");	1947 llvm::report_fatal_error(

	1948 "Integer div/rem should have been handled earlier.");

1827 return;	1949 return;

1828 case InstArithmetic::Fadd:	1950 case InstArithmetic::Fadd:

1829 case InstArithmetic::Fsub:	1951 case InstArithmetic::Fsub:

1830 case InstArithmetic::Fmul:	1952 case InstArithmetic::Fmul:

1831 case InstArithmetic::Fdiv:	1953 case InstArithmetic::Fdiv:

1832 case InstArithmetic::Frem:	1954 case InstArithmetic::Frem:

1833 llvm_unreachable("Floating point arith should have been handled earlier.");	1955 llvm::report_fatal_error(

	1956 "Floating point arith should have been handled earlier.");

1834 return;	1957 return;

1835 }	1958 }

1836 }	1959 }

1837	1960

1838 void TargetARM32::lowerAssign(const InstAssign *Inst) {	1961 void TargetARM32::lowerAssign(const InstAssign *Inst) {

1839 Variable *Dest = Inst->getDest();	1962 Variable *Dest = Inst->getDest();

1840 Operand *Src0 = Inst->getSrc(0);	1963 Operand *Src0 = Inst->getSrc(0);

1841 assert(Dest->getType() == Src0->getType());	1964 assert(Dest->getType() == Src0->getType());

1842 if (Dest->getType() == IceType_i64) {	1965 if (Dest->getType() == IceType_i64) {

1843 Src0 = legalizeUndef(Src0);	1966 Src0 = legalizeUndef(Src0);

	1967

	1968 Variable *T_Lo = makeReg(IceType_i32);

	1969 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

1844 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);	1970 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);

1845 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

1846 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1847 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1848 Variable *T_Lo = makeReg(IceType_i32);

1849 Variable *T_Hi = makeReg(IceType_i32);

1850

1851 _mov(T_Lo, Src0Lo);	1971 _mov(T_Lo, Src0Lo);

1852 _mov(DestLo, T_Lo);	1972 _mov(DestLo, T_Lo);

	1973

	1974 Variable *T_Hi = makeReg(IceType_i32);

	1975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1976 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

1853 _mov(T_Hi, Src0Hi);	1977 _mov(T_Hi, Src0Hi);

1854 _mov(DestHi, T_Hi);	1978 _mov(DestHi, T_Hi);

	1979

	1980 return;

	1981 }

	1982

	1983 Operand *NewSrc;

	1984 if (Dest->hasReg()) {

	1985 // If Dest already has a physical register, then legalize the Src operand

	1986 // into a Variable with the same register assignment. This especially

	1987 // helps allow the use of Flex operands.

	1988 NewSrc = legalize(Src0, Legal_Reg \| Legal_Flex, Dest->getRegNum());

1855 } else {	1989 } else {

1856 Operand *NewSrc;	1990 // Dest could be a stack operand. Since we could potentially need to do a

1857 if (Dest->hasReg()) {	1991 // Store (and store can only have Register operands), legalize this to a

1858 // If Dest already has a physical register, then legalize the Src operand	1992 // register.

1859 // into a Variable with the same register assignment. This especially	1993 NewSrc = legalize(Src0, Legal_Reg);

1860 // helps allow the use of Flex operands.

1861 NewSrc = legalize(Src0, Legal_Reg \| Legal_Flex, Dest->getRegNum());

1862 } else {

1863 // Dest could be a stack operand. Since we could potentially need to do a

1864 // Store (and store can only have Register operands), legalize this to a

1865 // register.

1866 NewSrc = legalize(Src0, Legal_Reg);

1867 }

1868 if (isVectorType(Dest->getType())) {

1869 Variable *SrcR = legalizeToReg(NewSrc);

1870 _mov(Dest, SrcR);

1871 } else if (isFloatingType(Dest->getType())) {

1872 Variable *SrcR = legalizeToReg(NewSrc);

1873 _mov(Dest, SrcR);

1874 } else {

1875 _mov(Dest, NewSrc);

1876 }

1877 }	1994 }

	1995

	1996 if (isVectorType(Dest->getType()) \|\| isScalarFloatingType(Dest->getType())) {

	1997 NewSrc = legalize(NewSrc, Legal_Reg \| Legal_Mem);

	1998 }

	1999 _mov(Dest, NewSrc);

1878 }	2000 }

1879	2001

1880 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(	2002 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(

1881 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,	2003 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,

1882 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {	2004 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {

1883 InstARM32Label *NewShortCircuitLabel = nullptr;	2005 InstARM32Label *NewShortCircuitLabel = nullptr;

1884 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);	2006 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg \| Legal_Flex);

1885	2007

1886 const Inst *Producer = BoolComputations.getProducerOf(Boolean);	2008 const Inst *Producer = BoolComputations.getProducerOf(Boolean);

1887	2009

(...skipping 685 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2573 struct {	2695 struct {

2574 CondARM32::Cond CC0;	2696 CondARM32::Cond CC0;

2575 CondARM32::Cond CC1;	2697 CondARM32::Cond CC1;

2576 } TableFcmp[] = {	2698 } TableFcmp[] = {

2577 #define X(val, CC0, CC1) \	2699 #define X(val, CC0, CC1) \

2578 { CondARM32::CC0, CondARM32::CC1 } \	2700 { CondARM32::CC0, CondARM32::CC1 } \

2579 ,	2701 ,

2580 FCMPARM32_TABLE	2702 FCMPARM32_TABLE

2581 #undef X	2703 #undef X

2582 };	2704 };

	2705
	sehr 2015/11/13 21:56:29 Is there a more common place for this sort of func Is there a more common place for this sort of function? IceOperand.h? John 2015/11/13 22:00:41 Maybe. If you think it's useful, you could add fro Show quoted text On 2015/11/13 21:56:29, sehr (please use this account) wrote: > Is there a more common place for this sort of function? IceOperand.h? Maybe. If you think it's useful, you could add from your cl, and i would use it here. John 2015/11/14 00:00:38 Oh, I thought this was Jim. He had the same routin Show quoted text On 2015/11/13 22:00:41, John wrote: > On 2015/11/13 21:56:29, sehr (please use this account) wrote: > > Is there a more common place for this sort of function? IceOperand.h? > > Maybe. If you think it's useful, you could add from your cl, and i would use it > here. Oh, I thought this was Jim. He had the same routine in the pxor cl. I changed the implementation to invoke the newly added Utils::isPositiveZero method.
	2706 bool isFloatingPointZero(Operand *Src) {

	2707 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {

	2708 return F32->getValue() == 0.0f;

	2709 }

	2710

	2711 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {

	2712 return F64->getValue() == 0.0;

	2713 }

	2714

	2715 return false;

	2716 }

2583 } // end of anonymous namespace	2717 } // end of anonymous namespace

2584	2718

2585 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {	2719 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {

2586 InstFcmp::FCond Condition = Instr->getCondition();	2720 InstFcmp::FCond Condition = Instr->getCondition();

2587 switch (Condition) {	2721 switch (Condition) {

2588 case InstFcmp::False:	2722 case InstFcmp::False:

2589 return CondWhenTrue(CondARM32::kNone);	2723 return CondWhenTrue(CondARM32::kNone);

2590 case InstFcmp::True:	2724 case InstFcmp::True:

2591 return CondWhenTrue(CondARM32::AL);	2725 return CondWhenTrue(CondARM32::AL);

2592 break;	2726 break;

2593 default: {	2727 default: {

2594 Variable *Src0R = legalizeToReg(Instr->getSrc(0));	2728 Variable *Src0R = legalizeToReg(Instr->getSrc(0));

2595 Variable *Src1R = legalizeToReg(Instr->getSrc(1));	2729 Operand *Src1 = Instr->getSrc(1);

2596 _vcmp(Src0R, Src1R);	2730 if (isFloatingPointZero(Src1)) {

	2731 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));

	2732 } else {

	2733 _vcmp(Src0R, legalizeToReg(Src1));

	2734 }

2597 _vmrs();	2735 _vmrs();

2598 assert(Condition < llvm::array_lengthof(TableFcmp));	2736 assert(Condition < llvm::array_lengthof(TableFcmp));

2599 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);	2737 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);

2600 }	2738 }

2601 }	2739 }

2602 }	2740 }

2603	2741

2604 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {	2742 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {

2605 Variable *Dest = Instr->getDest();	2743 Variable *Dest = Instr->getDest();

2606 if (isVectorType(Dest->getType())) {	2744 if (isVectorType(Dest->getType())) {

(...skipping 27 matching lines...) Expand all Loading...
2634 } else {	2772 } else {

2635 _mov(T, _1, Cond.WhenTrue0);	2773 _mov(T, _1, Cond.WhenTrue0);

2636 }	2774 }

2637	2775

2638 if (Cond.WhenTrue1 != CondARM32::kNone) {	2776 if (Cond.WhenTrue1 != CondARM32::kNone) {

2639 _mov_redefined(T, _1, Cond.WhenTrue1);	2777 _mov_redefined(T, _1, Cond.WhenTrue1);

2640 }	2778 }

2641	2779

2642 _mov(Dest, T);	2780 _mov(Dest, T);

2643 }	2781 }

	2782 TargetARM32::CondWhenTrue

	2783 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

	2784 Operand *Src1) {

	2785 size_t Index = static_cast<size_t>(Condition);

	2786 assert(Index < llvm::array_lengthof(TableIcmp64));

2644	2787

2645 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {	2788 Operand *NonConstOp = nullptr;

2646 assert(Inst->getSrc(0)->getType() != IceType_i1);	2789 uint64_t Value;

2647 assert(Inst->getSrc(1)->getType() != IceType_i1);	2790 if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {

	2791 Value = C->getValue();

	2792 NonConstOp = Src0;

	2793 } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) {

	2794 Value = C->getValue();

	2795 NonConstOp = Src1;

	2796 }

2648	2797

2649 Operand *Src0 = legalizeUndef(Inst->getSrc(0));	2798 Variable Src0RLo, Src0RHi;

2650 Operand *Src1 = legalizeUndef(Inst->getSrc(1));	2799 Operand Src1RFLo, Src1RFHi;

	2800

	2801 if (NonConstOp != nullptr) {

	2802 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) &&

	2803 Value == 0) {

	2804 Variable *T = makeReg(IceType_i32);

	2805 _orrs(T, legalizeToReg(loOperand(NonConstOp)),

	2806 legalize(hiOperand(NonConstOp), Legal_Reg \| Legal_Flex));

	2807 Context.insert(InstFakeUse::create(Func, T));

	2808 return CondWhenTrue(TableIcmp64[Index].C1);

	2809 }

	2810

	2811 Src0RLo = legalizeToReg(loOperand(NonConstOp));

	2812 Src0RHi = legalizeToReg(hiOperand(NonConstOp));

	2813 if ((Value >> 32) == (Value & 0xFFFFFFFF)) {

	2814 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),

	2815 Legal_Reg \| Legal_Flex);

	2816 Src1RFHi = Src1RFLo;

	2817 } else {

	2818 Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),

	2819 Legal_Reg \| Legal_Flex);

	2820 Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF),

	2821 Legal_Reg \| Legal_Flex);

	2822 }

	2823

	2824 bool UseRsb = false;

	2825 if (TableIcmp64[Index].Swapped) {

	2826 UseRsb = NonConstOp == Src0;

	2827 } else {

	2828 UseRsb = NonConstOp == Src1;

	2829 }

	2830

	2831 if (UseRsb) {

	2832 if (TableIcmp64[Index].IsSigned) {

	2833 Variable *T = makeReg(IceType_i32);

	2834 _rsbs(T, Src0RLo, Src1RFLo);

	2835 Context.insert(InstFakeUse::create(Func, T));

	2836

	2837 T = makeReg(IceType_i32);

	2838 _rscs(T, Src0RHi, Src1RFHi);

	2839 // We need to add a FakeUse here because liveness gets mad at us (Def

	2840 // without Use.) Note that flag-setting instructions are considered to

	2841 // have side effects and, therefore, are not DCE'ed.

	2842 Context.insert(InstFakeUse::create(Func, T));

	2843 } else {

	2844 Variable *T = makeReg(IceType_i32);

	2845 _rsbs(T, Src0RHi, Src1RFHi);

	2846 Context.insert(InstFakeUse::create(Func, T));

	2847

	2848 T = makeReg(IceType_i32);

	2849 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);

	2850 Context.insert(InstFakeUse::create(Func, T));

	2851 }

	2852 } else {

	2853 if (TableIcmp64[Index].IsSigned) {

	2854 _cmp(Src0RLo, Src1RFLo);

	2855 Variable *T = makeReg(IceType_i32);

	2856 _sbcs(T, Src0RHi, Src1RFHi);

	2857 Context.insert(InstFakeUse::create(Func, T));

	2858 } else {

	2859 _cmp(Src0RHi, Src1RFHi);

	2860 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

	2861 }

	2862 }

	2863

	2864 return CondWhenTrue(TableIcmp64[Index].C1);

	2865 }

	2866

	2867 if (TableIcmp64[Index].Swapped) {

	2868 Src0RLo = legalizeToReg(loOperand(Src1));

	2869 Src0RHi = legalizeToReg(hiOperand(Src1));

	2870 Src1RFLo = legalizeToReg(loOperand(Src0));

	2871 Src1RFHi = legalizeToReg(hiOperand(Src0));

	2872 } else {

	2873 Src0RLo = legalizeToReg(loOperand(Src0));

	2874 Src0RHi = legalizeToReg(hiOperand(Src0));

	2875 Src1RFLo = legalizeToReg(loOperand(Src1));

	2876 Src1RFHi = legalizeToReg(hiOperand(Src1));

	2877 }

2651	2878

2652 // a=icmp cond, b, c ==>	2879 // a=icmp cond, b, c ==>

2653 // GCC does:	2880 // GCC does:

2654 // cmp b.hi, c.hi or cmp b.lo, c.lo	2881 // cmp b.hi, c.hi or cmp b.lo, c.lo

2655 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi	2882 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi

2656 // mov.<C1> t, #1 mov.<C1> t, #1	2883 // mov.<C1> t, #1 mov.<C1> t, #1

2657 // mov.<C2> t, #0 mov.<C2> t, #0	2884 // mov.<C2> t, #0 mov.<C2> t, #0

2658 // mov a, t mov a, t	2885 // mov a, t mov a, t

2659 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"	2886 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"

2660 // is used for signed compares. In some cases, b and c need to be swapped as	2887 // is used for signed compares. In some cases, b and c need to be swapped as

(...skipping 10 matching lines...) Expand all Loading...
2671 // that's nice in that it's just as short but has fewer dependencies for	2898 // that's nice in that it's just as short but has fewer dependencies for

2672 // better ILP at the cost of more registers.	2899 // better ILP at the cost of more registers.

2673 //	2900 //

2674 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two	2901 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two

2675 // unconditional mov #0, two cmps, two conditional mov #1, and one	2902 // unconditional mov #0, two cmps, two conditional mov #1, and one

2676 // conditional reg mov. That has few dependencies for good ILP, but is a	2903 // conditional reg mov. That has few dependencies for good ILP, but is a

2677 // longer sequence.	2904 // longer sequence.

2678 //	2905 //

2679 // So, we are going with the GCC version since it's usually better (except	2906 // So, we are going with the GCC version since it's usually better (except

2680 // perhaps for eq/ne). We could revisit special-casing eq/ne later.	2907 // perhaps for eq/ne). We could revisit special-casing eq/ne later.

	2908 if (TableIcmp64[Index].IsSigned) {

	2909 Variable *ScratchReg = makeReg(IceType_i32);

	2910 _cmp(Src0RLo, Src1RFLo);

	2911 _sbcs(ScratchReg, Src0RHi, Src1RFHi);

	2912 // ScratchReg isn't going to be used, but we need the side-effect of

	2913 // setting flags from this operation.

	2914 Context.insert(InstFakeUse::create(Func, ScratchReg));

	2915 } else {

	2916 _cmp(Src0RHi, Src1RFHi);

	2917 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

	2918 }

	2919 return CondWhenTrue(TableIcmp64[Index].C1);

	2920 }

2681	2921

2682 if (Src0->getType() == IceType_i64) {	2922 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

2683 InstIcmp::ICond Conditon = Inst->getCondition();	2923 assert(Inst->getSrc(0)->getType() != IceType_i1);

2684 size_t Index = static_cast<size_t>(Conditon);	2924 assert(Inst->getSrc(1)->getType() != IceType_i1);

2685 assert(Index < llvm::array_lengthof(TableIcmp64));

2686 Variable Src0Lo, Src0Hi;

2687 Operand Src1LoRF, Src1HiRF;

2688 if (TableIcmp64[Index].Swapped) {

2689 Src0Lo = legalizeToReg(loOperand(Src1));

2690 Src0Hi = legalizeToReg(hiOperand(Src1));

2691 Src1LoRF = legalize(loOperand(Src0), Legal_Reg \| Legal_Flex);

2692 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg \| Legal_Flex);

2693 } else {

2694 Src0Lo = legalizeToReg(loOperand(Src0));

2695 Src0Hi = legalizeToReg(hiOperand(Src0));

2696 Src1LoRF = legalize(loOperand(Src1), Legal_Reg \| Legal_Flex);

2697 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg \| Legal_Flex);

2698 }

2699 if (TableIcmp64[Index].IsSigned) {

2700 Variable *ScratchReg = makeReg(IceType_i32);

2701 _cmp(Src0Lo, Src1LoRF);

2702 _sbcs(ScratchReg, Src0Hi, Src1HiRF);

2703 // ScratchReg isn't going to be used, but we need the side-effect of

2704 // setting flags from this operation.

2705 Context.insert(InstFakeUse::create(Func, ScratchReg));

2706 } else {

2707 _cmp(Src0Hi, Src1HiRF);

2708 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);

2709 }

2710 return CondWhenTrue(TableIcmp64[Index].C1);

2711 }

2712	2925

	2926 Operand *Src0 = legalizeUndef(Inst->getSrc(0));

	2927 Operand *Src1 = legalizeUndef(Inst->getSrc(1));

	2928

	2929 InstIcmp::ICond Condition = Inst->getCondition();

2713 // a=icmp cond b, c ==>	2930 // a=icmp cond b, c ==>

2714 // GCC does:	2931 // GCC does:

2715 // <u/s>xtb tb, b	2932 // <u/s>xtb tb, b

2716 // <u/s>xtb tc, c	2933 // <u/s>xtb tc, c

2717 // cmp tb, tc	2934 // cmp tb, tc

2718 // mov.C1 t, #0	2935 // mov.C1 t, #0

2719 // mov.C2 t, #1	2936 // mov.C2 t, #1

2720 // mov a, t	2937 // mov a, t

2721 // where the unsigned/sign extension is not needed for 32-bit. They also have	2938 // where the unsigned/sign extension is not needed for 32-bit. They also have

2722 // special cases for EQ and NE. E.g., for NE:	2939 // special cases for EQ and NE. E.g., for NE:

2723 // <extend to tb, tc>	2940 // <extend to tb, tc>

2724 // subs t, tb, tc	2941 // subs t, tb, tc

2725 // movne t, #1	2942 // movne t, #1

2726 // mov a, t	2943 // mov a, t

2727 //	2944 //

2728 // LLVM does:	2945 // LLVM does:

2729 // lsl tb, b, #<N>	2946 // lsl tb, b, #<N>

2730 // mov t, #0	2947 // mov t, #0

2731 // cmp tb, c, lsl #<N>	2948 // cmp tb, c, lsl #<N>

2732 // mov.<C> t, #1	2949 // mov.<C> t, #1

2733 // mov a, t	2950 // mov a, t

2734 //	2951 //

2735 // the left shift is by 0, 16, or 24, which allows the comparison to focus on	2952 // the left shift is by 0, 16, or 24, which allows the comparison to focus on

2736 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For	2953 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For

2737 // the unsigned case, for some reason it does similar to GCC and does a uxtb	2954 // the unsigned case, for some reason it does similar to GCC and does a uxtb

2738 // first. It's not clear to me why that special-casing is needed.	2955 // first. It's not clear to me why that special-casing is needed.

2739 //	2956 //

2740 // We'll go with the LLVM way for now, since it's shorter and has just as few	2957 // We'll go with the LLVM way for now, since it's shorter and has just as few

2741 // dependencies.	2958 // dependencies.

2742 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());	2959 Operand *NonConstOp = nullptr;

2743 assert(ShiftAmt >= 0);	2960 int32_t Value;

2744 Constant *ShiftConst = nullptr;	2961 if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

2745 Variable *Src0R = nullptr;	2962 Value = C->getValue();

2746 if (ShiftAmt) {	2963 NonConstOp = Src0;

2747 ShiftConst = Ctx->getConstantInt32(ShiftAmt);	2964 } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) {

2748 Src0R = makeReg(IceType_i32);	2965 Value = C->getValue();

2749 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);	2966 NonConstOp = Src1;

2750 } else {

2751 Src0R = legalizeToReg(Src0);

2752 }	2967 }

2753 if (ShiftAmt) {	2968

	2969 switch (Src0->getType()) {

	2970 default:

	2971 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");

	2972 case IceType_i64:

	2973 return lowerInt64IcmpCond(Condition, Src0, Src1);

	2974 case IceType_i8:

	2975 case IceType_i16: {

	2976 int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());

	2977 assert(ShAmt >= 0);

	2978

	2979 if (NonConstOp != nullptr) {

	2980 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) &&

	2981 Value == 0) {

	2982 Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);

	2983 Variable *T = makeReg(IceType_i32);

	2984 _lsls(T, legalizeToReg(NonConstOp), ShAmtOp);

	2985 Context.insert(InstFakeUse::create(Func, T));

	2986 return CondWhenTrue(getIcmp32Mapping(Condition));

	2987 }

	2988 Variable *ConstR = makeReg(IceType_i32);

	2989 _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt),

	2990 Legal_Reg \| Legal_Flex));

	2991 Operand *NonConstF = OperandARM32FlexReg::create(

	2992 Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL,

	2993 Ctx->getConstantInt32(ShAmt));

	2994

	2995 if (Src1 == NonConstOp) {

	2996 _cmp(ConstR, NonConstF);

	2997 } else {

	2998 Variable *T = makeReg(IceType_i32);

	2999 _rsbs(T, ConstR, NonConstF);

	3000 Context.insert(InstFakeUse::create(Func, T));

	3001 }

	3002 return CondWhenTrue(getIcmp32Mapping(Condition));

	3003 }

	3004

	3005 Variable *Src0R = makeReg(IceType_i32);

	3006 Operand *ShAmtF =

	3007 legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg \| Legal_Flex);

	3008 _lsl(Src0R, legalizeToReg(Src0), ShAmtF);

	3009

2754 Variable *Src1R = legalizeToReg(Src1);	3010 Variable *Src1R = legalizeToReg(Src1);

2755 OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(	3011 OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(

2756 Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);	3012 Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);

2757 _cmp(Src0R, Src1RShifted);	3013 _cmp(Src0R, Src1F);

2758 } else {	3014 return CondWhenTrue(getIcmp32Mapping(Condition));

2759 Operand *Src1RF = legalize(Src1, Legal_Reg \| Legal_Flex);

2760 _cmp(Src0R, Src1RF);

2761 }	3015 }

2762 return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));	3016 case IceType_i32: {

	3017 if (NonConstOp != nullptr) {

	3018 if ((Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) &&

	3019 Value == 0) {

	3020 Variable *T = makeReg(IceType_i32);

	3021 Variable *OpR = legalizeToReg(NonConstOp);

	3022 _orrs(T, OpR, OpR);

	3023 Context.insert(InstFakeUse::create(Func, T));

	3024 return CondWhenTrue(getIcmp32Mapping(Condition));

	3025 }

	3026

	3027 Operand *ConstRF =

	3028 legalize(Ctx->getConstantInt32(Value), Legal_Reg \| Legal_Flex);

	3029 Variable *NonConstR = legalizeToReg(NonConstOp);

	3030

	3031 if (Src0 == NonConstOp) {

	3032 _cmp(NonConstR, ConstRF);

	3033 } else {

	3034 Variable *T = makeReg(IceType_i32);

	3035 _rsbs(T, NonConstR, ConstRF);

	3036 Context.insert(InstFakeUse::create(Func, T));

	3037 }

	3038 return CondWhenTrue(getIcmp32Mapping(Condition));

	3039 }

	3040

	3041 Variable *Src0R = legalizeToReg(Src0);

	3042 Variable *Src1R = legalizeToReg(Src1);

	3043 _cmp(Src0R, Src1R);

	3044 return CondWhenTrue(getIcmp32Mapping(Condition));

	3045 }

	3046 }

2763 }	3047 }

2764	3048

2765 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {	3049 void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

2766 Variable *Dest = Inst->getDest();	3050 Variable *Dest = Inst->getDest();

2767	3051

2768 if (isVectorType(Dest->getType())) {	3052 if (isVectorType(Dest->getType())) {

2769 Variable *T = makeReg(Dest->getType());	3053 Variable *T = makeReg(Dest->getType());

2770 Context.insert(InstFakeDef::create(Func, T));	3054 Context.insert(InstFakeDef::create(Func, T));

2771 _mov(Dest, T);	3055 _mov(Dest, T);

2772 UnimplementedError(Func->getContext()->getFlags());	3056 UnimplementedError(Func->getContext()->getFlags());

(...skipping 1474 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4247 }	4531 }

4248 return Reg;	4532 return Reg;

4249 }	4533 }

4250 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {	4534 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {

4251 Variable *Reg = makeReg(Ty, RegNum);	4535 Variable *Reg = makeReg(Ty, RegNum);

4252 _movw(Reg, C);	4536 _movw(Reg, C);

4253 _movt(Reg, C);	4537 _movt(Reg, C);

4254 return Reg;	4538 return Reg;

4255 } else {	4539 } else {

4256 assert(isScalarFloatingType(Ty));	4540 assert(isScalarFloatingType(Ty));

	4541 uint32_t ModifiedImm;

	4542 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {

	4543 Variable *T = makeReg(Ty, RegNum);

	4544 _mov(T,

	4545 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));

	4546 return T;

	4547 }

	4548

4257 // Load floats/doubles from literal pool.	4549 // Load floats/doubles from literal pool.

4258 // TODO(jvoung): Allow certain immediates to be encoded directly in an

4259 // operand. See Table A7-18 of the ARM manual: "Floating-point modified

4260 // immediate constants". Or, for 32-bit floating point numbers, just

4261 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG

4262 // instead of using a movw/movt pair to get the const-pool address then

4263 // loading to SREG.

4264 std::string Buffer;	4550 std::string Buffer;

4265 llvm::raw_string_ostream StrBuf(Buffer);	4551 llvm::raw_string_ostream StrBuf(Buffer);

4266 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);	4552 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);

4267 llvm::cast<Constant>(From)->setShouldBePooled(true);	4553 llvm::cast<Constant>(From)->setShouldBePooled(true);

4268 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);	4554 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);

4269 Variable *BaseReg = makeReg(getPointerType());	4555 Variable *BaseReg = makeReg(getPointerType());

4270 _movw(BaseReg, Offset);	4556 _movw(BaseReg, Offset);

4271 _movt(BaseReg, Offset);	4557 _movt(BaseReg, Offset);

4272 From = formMemoryOperand(BaseReg, Ty);	4558 From = formMemoryOperand(BaseReg, Ty);

4273 return copyToReg(From, RegNum);	4559 return copyToReg(From, RegNum);

(...skipping 625 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4899 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	5185 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

4900 // However, for compatibility with current NaCl LLVM, don't claim that.	5186 // However, for compatibility with current NaCl LLVM, don't claim that.

4901 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	5187 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

4902 }	5188 }

4903	5189

4904 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];	5190 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];

4905 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];	5191 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

4906 llvm::SmallBitVector TargetARM32::ScratchRegs;	5192 llvm::SmallBitVector TargetARM32::ScratchRegs;

4907	5193

4908 } // end of namespace Ice	5194 } // end of namespace Ice

OLD	NEW

« src/IceInstARM32.cpp ('K') | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('j') | no next file with comments »