OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 1269 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1280 // multiple of the required alignment at runtime. | 1280 // multiple of the required alignment at runtime. |
1281 Variable *T = makeReg(IceType_i32); | 1281 Variable *T = makeReg(IceType_i32); |
1282 _mov(T, TotalSize); | 1282 _mov(T, TotalSize); |
1283 _add(T, Ctx->getConstantInt32(Alignment - 1)); | 1283 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
1284 _and(T, Ctx->getConstantInt32(-Alignment)); | 1284 _and(T, Ctx->getConstantInt32(-Alignment)); |
1285 _sub(esp, T); | 1285 _sub(esp, T); |
1286 } | 1286 } |
1287 _mov(Dest, esp); | 1287 _mov(Dest, esp); |
1288 } | 1288 } |
1289 | 1289 |
| 1290 // Strength-reduce scalar integer multiplication by a constant (for |
| 1291 // i32 or narrower) for certain constants. The lea instruction can be |
| 1292 // used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| 1293 // to multiply by powers of 2. These can be combined such that |
| 1294 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| 1295 // combined with left-shifting by 2. |
| 1296 bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1297 int32_t Src1) { |
| 1298 // Disable this optimization for Om1 and O0, just to keep things |
| 1299 // simple there. |
| 1300 if (Ctx->getFlags().getOptLevel() < Opt_1) |
| 1301 return false; |
| 1302 Type Ty = Dest->getType(); |
| 1303 Variable *T = nullptr; |
| 1304 if (Src1 == -1) { |
| 1305 _mov(T, Src0); |
| 1306 _neg(T); |
| 1307 _mov(Dest, T); |
| 1308 return true; |
| 1309 } |
| 1310 if (Src1 == 0) { |
| 1311 _mov(Dest, Ctx->getConstantZero(Ty)); |
| 1312 return true; |
| 1313 } |
| 1314 if (Src1 == 1) { |
| 1315 _mov(T, Src0); |
| 1316 _mov(Dest, T); |
| 1317 return true; |
| 1318 } |
| 1319 // Don't bother with the edge case where Src1 == MININT. |
| 1320 if (Src1 == -Src1) |
| 1321 return false; |
| 1322 const bool Src1IsNegative = Src1 < 0; |
| 1323 if (Src1IsNegative) |
| 1324 Src1 = -Src1; |
| 1325 uint32_t Count9 = 0; |
| 1326 uint32_t Count5 = 0; |
| 1327 uint32_t Count3 = 0; |
| 1328 uint32_t Count2 = 0; |
| 1329 uint32_t CountOps = 0; |
| 1330 while (Src1 > 1) { |
| 1331 if (Src1 % 9 == 0) { |
| 1332 ++CountOps; |
| 1333 ++Count9; |
| 1334 Src1 /= 9; |
| 1335 } else if (Src1 % 5 == 0) { |
| 1336 ++CountOps; |
| 1337 ++Count5; |
| 1338 Src1 /= 5; |
| 1339 } else if (Src1 % 3 == 0) { |
| 1340 ++CountOps; |
| 1341 ++Count3; |
| 1342 Src1 /= 3; |
| 1343 } else if (Src1 % 2 == 0) { |
| 1344 if (Count2 == 0) |
| 1345 ++CountOps; |
| 1346 ++Count2; |
| 1347 Src1 /= 2; |
| 1348 } else { |
| 1349 return false; |
| 1350 } |
| 1351 } |
| 1352 // Lea optimization only works for i16 and i32 types, not i8. |
| 1353 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| 1354 return false; |
| 1355 // Limit the number of lea/shl operations for a single multiply, to |
| 1356 // a somewhat arbitrary choice of 3. |
| 1357 const uint32_t MaxOpsForOptimizedMul = 3; |
| 1358 if (CountOps > MaxOpsForOptimizedMul) |
| 1359 return false; |
| 1360 _mov(T, Src0); |
| 1361 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1362 for (uint32_t i = 0; i < Count9; ++i) { |
| 1363 const uint16_t Shift = 3; // log2(9-1) |
| 1364 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1365 _set_dest_nonkillable(); |
| 1366 } |
| 1367 for (uint32_t i = 0; i < Count5; ++i) { |
| 1368 const uint16_t Shift = 2; // log2(5-1) |
| 1369 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1370 _set_dest_nonkillable(); |
| 1371 } |
| 1372 for (uint32_t i = 0; i < Count3; ++i) { |
| 1373 const uint16_t Shift = 1; // log2(3-1) |
| 1374 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); |
| 1375 _set_dest_nonkillable(); |
| 1376 } |
| 1377 if (Count2) { |
| 1378 _shl(T, Ctx->getConstantInt(Ty, Count2)); |
| 1379 } |
| 1380 if (Src1IsNegative) |
| 1381 _neg(T); |
| 1382 _mov(Dest, T); |
| 1383 return true; |
| 1384 } |
| 1385 |
1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1386 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
1291 Variable *Dest = Inst->getDest(); | 1387 Variable *Dest = Inst->getDest(); |
1292 Operand *Src0 = legalize(Inst->getSrc(0)); | 1388 Operand *Src0 = legalize(Inst->getSrc(0)); |
1293 Operand *Src1 = legalize(Inst->getSrc(1)); | 1389 Operand *Src1 = legalize(Inst->getSrc(1)); |
1294 if (Inst->isCommutative()) { | 1390 if (Inst->isCommutative()) { |
1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | 1391 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) |
1296 std::swap(Src0, Src1); | 1392 std::swap(Src0, Src1); |
| 1393 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) |
| 1394 std::swap(Src0, Src1); |
1297 } | 1395 } |
1298 if (Dest->getType() == IceType_i64) { | 1396 if (Dest->getType() == IceType_i64) { |
1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1397 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1398 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1301 Operand *Src0Lo = loOperand(Src0); | 1399 Operand *Src0Lo = loOperand(Src0); |
1302 Operand *Src0Hi = hiOperand(Src0); | 1400 Operand *Src0Hi = hiOperand(Src0); |
1303 Operand *Src1Lo = loOperand(Src1); | 1401 Operand *Src1Lo = loOperand(Src1); |
1304 Operand *Src1Hi = hiOperand(Src1); | 1402 Operand *Src1Hi = hiOperand(Src1); |
1305 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1403 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
1306 switch (Inst->getOp()) { | 1404 switch (Inst->getOp()) { |
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1514 lowerCall(Call); | 1612 lowerCall(Call); |
1515 } break; | 1613 } break; |
1516 case InstArithmetic::Fadd: | 1614 case InstArithmetic::Fadd: |
1517 case InstArithmetic::Fsub: | 1615 case InstArithmetic::Fsub: |
1518 case InstArithmetic::Fmul: | 1616 case InstArithmetic::Fmul: |
1519 case InstArithmetic::Fdiv: | 1617 case InstArithmetic::Fdiv: |
1520 case InstArithmetic::Frem: | 1618 case InstArithmetic::Frem: |
1521 llvm_unreachable("FP instruction with i64 type"); | 1619 llvm_unreachable("FP instruction with i64 type"); |
1522 break; | 1620 break; |
1523 } | 1621 } |
1524 } else if (isVectorType(Dest->getType())) { | 1622 return; |
| 1623 } |
| 1624 if (isVectorType(Dest->getType())) { |
1525 // TODO: Trap on integer divide and integer modulo by zero. | 1625 // TODO: Trap on integer divide and integer modulo by zero. |
1526 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 | 1626 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 |
1527 if (llvm::isa<OperandX8632Mem>(Src1)) | 1627 if (llvm::isa<OperandX8632Mem>(Src1)) |
1528 Src1 = legalizeToVar(Src1); | 1628 Src1 = legalizeToVar(Src1); |
1529 switch (Inst->getOp()) { | 1629 switch (Inst->getOp()) { |
1530 case InstArithmetic::_num: | 1630 case InstArithmetic::_num: |
1531 llvm_unreachable("Unknown arithmetic operator"); | 1631 llvm_unreachable("Unknown arithmetic operator"); |
1532 break; | 1632 break; |
1533 case InstArithmetic::Add: { | 1633 case InstArithmetic::Add: { |
1534 Variable *T = makeReg(Dest->getType()); | 1634 Variable *T = makeReg(Dest->getType()); |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1643 case InstArithmetic::Fdiv: { | 1743 case InstArithmetic::Fdiv: { |
1644 Variable *T = makeReg(Dest->getType()); | 1744 Variable *T = makeReg(Dest->getType()); |
1645 _movp(T, Src0); | 1745 _movp(T, Src0); |
1646 _divps(T, Src1); | 1746 _divps(T, Src1); |
1647 _movp(Dest, T); | 1747 _movp(Dest, T); |
1648 } break; | 1748 } break; |
1649 case InstArithmetic::Frem: | 1749 case InstArithmetic::Frem: |
1650 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1750 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
1651 break; | 1751 break; |
1652 } | 1752 } |
1653 } else { // Dest->getType() is non-i64 scalar | 1753 return; |
1654 Variable *T_edx = nullptr; | 1754 } |
1655 Variable *T = nullptr; | 1755 Variable *T_edx = nullptr; |
1656 switch (Inst->getOp()) { | 1756 Variable *T = nullptr; |
1657 case InstArithmetic::_num: | 1757 switch (Inst->getOp()) { |
1658 llvm_unreachable("Unknown arithmetic operator"); | 1758 case InstArithmetic::_num: |
1659 break; | 1759 llvm_unreachable("Unknown arithmetic operator"); |
1660 case InstArithmetic::Add: | 1760 break; |
| 1761 case InstArithmetic::Add: |
| 1762 _mov(T, Src0); |
| 1763 _add(T, Src1); |
| 1764 _mov(Dest, T); |
| 1765 break; |
| 1766 case InstArithmetic::And: |
| 1767 _mov(T, Src0); |
| 1768 _and(T, Src1); |
| 1769 _mov(Dest, T); |
| 1770 break; |
| 1771 case InstArithmetic::Or: |
| 1772 _mov(T, Src0); |
| 1773 _or(T, Src1); |
| 1774 _mov(Dest, T); |
| 1775 break; |
| 1776 case InstArithmetic::Xor: |
| 1777 _mov(T, Src0); |
| 1778 _xor(T, Src1); |
| 1779 _mov(Dest, T); |
| 1780 break; |
| 1781 case InstArithmetic::Sub: |
| 1782 _mov(T, Src0); |
| 1783 _sub(T, Src1); |
| 1784 _mov(Dest, T); |
| 1785 break; |
| 1786 case InstArithmetic::Mul: |
| 1787 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1788 if (optimizeScalarMul(Dest, Src0, C->getValue())) |
| 1789 return; |
| 1790 } |
| 1791 // The 8-bit version of imul only allows the form "imul r/m8" |
| 1792 // where T must be in eax. |
| 1793 if (isByteSizedArithType(Dest->getType())) { |
| 1794 _mov(T, Src0, RegX8632::Reg_eax); |
| 1795 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1796 } else { |
1661 _mov(T, Src0); | 1797 _mov(T, Src0); |
1662 _add(T, Src1); | 1798 } |
1663 _mov(Dest, T); | 1799 _imul(T, Src1); |
1664 break; | 1800 _mov(Dest, T); |
1665 case InstArithmetic::And: | 1801 break; |
1666 _mov(T, Src0); | 1802 case InstArithmetic::Shl: |
1667 _and(T, Src1); | 1803 _mov(T, Src0); |
1668 _mov(Dest, T); | 1804 if (!llvm::isa<Constant>(Src1)) |
1669 break; | 1805 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); |
1670 case InstArithmetic::Or: | 1806 _shl(T, Src1); |
1671 _mov(T, Src0); | 1807 _mov(Dest, T); |
1672 _or(T, Src1); | 1808 break; |
1673 _mov(Dest, T); | 1809 case InstArithmetic::Lshr: |
1674 break; | 1810 _mov(T, Src0); |
1675 case InstArithmetic::Xor: | 1811 if (!llvm::isa<Constant>(Src1)) |
1676 _mov(T, Src0); | 1812 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); |
1677 _xor(T, Src1); | 1813 _shr(T, Src1); |
1678 _mov(Dest, T); | 1814 _mov(Dest, T); |
1679 break; | 1815 break; |
1680 case InstArithmetic::Sub: | 1816 case InstArithmetic::Ashr: |
1681 _mov(T, Src0); | 1817 _mov(T, Src0); |
1682 _sub(T, Src1); | 1818 if (!llvm::isa<Constant>(Src1)) |
1683 _mov(Dest, T); | 1819 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); |
1684 break; | 1820 _sar(T, Src1); |
1685 case InstArithmetic::Mul: | 1821 _mov(Dest, T); |
1686 // TODO: Optimize for llvm::isa<Constant>(Src1) | 1822 break; |
1687 // TODO: Strength-reduce multiplications by a constant, | 1823 case InstArithmetic::Udiv: |
1688 // particularly -1 and powers of 2. Advanced: use lea to | 1824 // div and idiv are the few arithmetic operators that do not allow |
1689 // multiply by 3, 5, 9. | 1825 // immediates as the operand. |
1690 // | 1826 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1691 // The 8-bit version of imul only allows the form "imul r/m8" | 1827 if (isByteSizedArithType(Dest->getType())) { |
1692 // where T must be in eax. | 1828 Variable *T_ah = nullptr; |
1693 if (isByteSizedArithType(Dest->getType())) { | 1829 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
1694 _mov(T, Src0, RegX8632::Reg_eax); | 1830 _mov(T, Src0, RegX8632::Reg_eax); |
1695 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1831 _mov(T_ah, Zero, RegX8632::Reg_ah); |
1696 } else { | 1832 _div(T, Src1, T_ah); |
1697 _mov(T, Src0); | 1833 _mov(Dest, T); |
| 1834 } else { |
| 1835 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1836 _mov(T, Src0, RegX8632::Reg_eax); |
| 1837 _mov(T_edx, Zero, RegX8632::Reg_edx); |
| 1838 _div(T, Src1, T_edx); |
| 1839 _mov(Dest, T); |
| 1840 } |
| 1841 break; |
| 1842 case InstArithmetic::Sdiv: |
| 1843 // TODO(stichnot): Enable this after doing better performance |
| 1844 // and cross testing. |
| 1845 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
| 1846 // Optimize division by constant power of 2, but not for Om1 |
| 1847 // or O0, just to keep things simple there. |
| 1848 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| 1849 int32_t Divisor = C->getValue(); |
| 1850 uint32_t UDivisor = static_cast<uint32_t>(Divisor); |
| 1851 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 1852 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 1853 Type Ty = Dest->getType(); |
| 1854 // LLVM does the following for dest=src/(1<<log): |
| 1855 // t=src |
| 1856 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 1857 // shr t,typewidth-log |
| 1858 // add t,src |
| 1859 // sar t,log |
| 1860 // dest=t |
| 1861 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 1862 _mov(T, Src0); |
| 1863 // If for some reason we are dividing by 1, just treat it |
| 1864 // like an assignment. |
| 1865 if (LogDiv > 0) { |
| 1866 // The initial sar is unnecessary when dividing by 2. |
| 1867 if (LogDiv > 1) |
| 1868 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 1869 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 1870 _add(T, Src0); |
| 1871 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| 1872 } |
| 1873 _mov(Dest, T); |
| 1874 return; |
| 1875 } |
1698 } | 1876 } |
1699 _imul(T, Src1); | 1877 } |
1700 _mov(Dest, T); | 1878 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1701 break; | 1879 if (isByteSizedArithType(Dest->getType())) { |
1702 case InstArithmetic::Shl: | 1880 _mov(T, Src0, RegX8632::Reg_eax); |
1703 _mov(T, Src0); | 1881 _cbwdq(T, T); |
1704 if (!llvm::isa<Constant>(Src1)) | 1882 _idiv(T, Src1, T); |
1705 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); | 1883 _mov(Dest, T); |
1706 _shl(T, Src1); | 1884 } else { |
1707 _mov(Dest, T); | 1885 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
1708 break; | 1886 _mov(T, Src0, RegX8632::Reg_eax); |
1709 case InstArithmetic::Lshr: | 1887 _cbwdq(T_edx, T); |
1710 _mov(T, Src0); | 1888 _idiv(T, Src1, T_edx); |
1711 if (!llvm::isa<Constant>(Src1)) | 1889 _mov(Dest, T); |
1712 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); | 1890 } |
1713 _shr(T, Src1); | 1891 break; |
1714 _mov(Dest, T); | 1892 case InstArithmetic::Urem: |
1715 break; | 1893 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1716 case InstArithmetic::Ashr: | 1894 if (isByteSizedArithType(Dest->getType())) { |
1717 _mov(T, Src0); | 1895 Variable *T_ah = nullptr; |
1718 if (!llvm::isa<Constant>(Src1)) | 1896 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
1719 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); | 1897 _mov(T, Src0, RegX8632::Reg_eax); |
1720 _sar(T, Src1); | 1898 _mov(T_ah, Zero, RegX8632::Reg_ah); |
1721 _mov(Dest, T); | 1899 _div(T_ah, Src1, T); |
1722 break; | 1900 _mov(Dest, T_ah); |
1723 case InstArithmetic::Udiv: | 1901 } else { |
1724 // div and idiv are the few arithmetic operators that do not allow | 1902 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1725 // immediates as the operand. | 1903 _mov(T_edx, Zero, RegX8632::Reg_edx); |
1726 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1904 _mov(T, Src0, RegX8632::Reg_eax); |
1727 if (isByteSizedArithType(Dest->getType())) { | 1905 _div(T_edx, Src1, T); |
1728 Variable *T_ah = nullptr; | 1906 _mov(Dest, T_edx); |
1729 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1907 } |
1730 _mov(T, Src0, RegX8632::Reg_eax); | 1908 break; |
1731 _mov(T_ah, Zero, RegX8632::Reg_ah); | 1909 case InstArithmetic::Srem: |
1732 _div(T, Src1, T_ah); | 1910 // TODO(stichnot): Enable this after doing better performance |
1733 _mov(Dest, T); | 1911 // and cross testing. |
1734 } else { | 1912 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { |
1735 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1913 // Optimize mod by constant power of 2, but not for Om1 or O0, |
1736 _mov(T, Src0, RegX8632::Reg_eax); | 1914 // just to keep things simple there. |
1737 _mov(T_edx, Zero, RegX8632::Reg_edx); | 1915 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
1738 _div(T, Src1, T_edx); | 1916 int32_t Divisor = C->getValue(); |
1739 _mov(Dest, T); | 1917 uint32_t UDivisor = static_cast<uint32_t>(Divisor); |
| 1918 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { |
| 1919 uint32_t LogDiv = llvm::Log2_32(UDivisor); |
| 1920 Type Ty = Dest->getType(); |
| 1921 // LLVM does the following for dest=src%(1<<log): |
| 1922 // t=src |
| 1923 // sar t,typewidth-1 // -1 if src is negative, 0 if not |
| 1924 // shr t,typewidth-log |
| 1925 // add t,src |
| 1926 // and t, -(1<<log) |
| 1927 // sub t,src |
| 1928 // neg t |
| 1929 // dest=t |
| 1930 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty); |
| 1931 // If for some reason we are dividing by 1, just assign 0. |
| 1932 if (LogDiv == 0) { |
| 1933 _mov(Dest, Ctx->getConstantZero(Ty)); |
| 1934 return; |
| 1935 } |
| 1936 _mov(T, Src0); |
| 1937 // The initial sar is unnecessary when dividing by 2. |
| 1938 if (LogDiv > 1) |
| 1939 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); |
| 1940 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); |
| 1941 _add(T, Src0); |
| 1942 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 1943 _sub(T, Src0); |
| 1944 _neg(T); |
| 1945 _mov(Dest, T); |
| 1946 return; |
| 1947 } |
1740 } | 1948 } |
1741 break; | 1949 } |
1742 case InstArithmetic::Sdiv: | 1950 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1743 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1951 if (isByteSizedArithType(Dest->getType())) { |
1744 if (isByteSizedArithType(Dest->getType())) { | 1952 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah); |
1745 _mov(T, Src0, RegX8632::Reg_eax); | 1953 _mov(T, Src0, RegX8632::Reg_eax); |
1746 _cbwdq(T, T); | 1954 _cbwdq(T, T); |
1747 _idiv(T, Src1, T); | 1955 Context.insert(InstFakeDef::create(Func, T_ah)); |
1748 _mov(Dest, T); | 1956 _idiv(T_ah, Src1, T); |
1749 } else { | 1957 _mov(Dest, T_ah); |
1750 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 1958 } else { |
1751 _mov(T, Src0, RegX8632::Reg_eax); | 1959 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
1752 _cbwdq(T_edx, T); | 1960 _mov(T, Src0, RegX8632::Reg_eax); |
1753 _idiv(T, Src1, T_edx); | 1961 _cbwdq(T_edx, T); |
1754 _mov(Dest, T); | 1962 _idiv(T_edx, Src1, T); |
1755 } | 1963 _mov(Dest, T_edx); |
1756 break; | 1964 } |
1757 case InstArithmetic::Urem: | 1965 break; |
1758 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1966 case InstArithmetic::Fadd: |
1759 if (isByteSizedArithType(Dest->getType())) { | 1967 _mov(T, Src0); |
1760 Variable *T_ah = nullptr; | 1968 _addss(T, Src1); |
1761 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1969 _mov(Dest, T); |
1762 _mov(T, Src0, RegX8632::Reg_eax); | 1970 break; |
1763 _mov(T_ah, Zero, RegX8632::Reg_ah); | 1971 case InstArithmetic::Fsub: |
1764 _div(T_ah, Src1, T); | 1972 _mov(T, Src0); |
1765 _mov(Dest, T_ah); | 1973 _subss(T, Src1); |
1766 } else { | 1974 _mov(Dest, T); |
1767 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1975 break; |
1768 _mov(T_edx, Zero, RegX8632::Reg_edx); | 1976 case InstArithmetic::Fmul: |
1769 _mov(T, Src0, RegX8632::Reg_eax); | 1977 _mov(T, Src0); |
1770 _div(T_edx, Src1, T); | 1978 _mulss(T, Src1); |
1771 _mov(Dest, T_edx); | 1979 _mov(Dest, T); |
1772 } | 1980 break; |
1773 break; | 1981 case InstArithmetic::Fdiv: |
1774 case InstArithmetic::Srem: | 1982 _mov(T, Src0); |
1775 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1983 _divss(T, Src1); |
1776 if (isByteSizedArithType(Dest->getType())) { | 1984 _mov(Dest, T); |
1777 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah); | 1985 break; |
1778 _mov(T, Src0, RegX8632::Reg_eax); | 1986 case InstArithmetic::Frem: { |
1779 _cbwdq(T, T); | 1987 const SizeT MaxSrcs = 2; |
1780 Context.insert(InstFakeDef::create(Func, T_ah)); | 1988 Type Ty = Dest->getType(); |
1781 _idiv(T_ah, Src1, T); | 1989 InstCall *Call = makeHelperCall( |
1782 _mov(Dest, T_ah); | 1990 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
1783 } else { | 1991 Call->addArg(Src0); |
1784 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); | 1992 Call->addArg(Src1); |
1785 _mov(T, Src0, RegX8632::Reg_eax); | 1993 return lowerCall(Call); |
1786 _cbwdq(T_edx, T); | 1994 } |
1787 _idiv(T_edx, Src1, T); | |
1788 _mov(Dest, T_edx); | |
1789 } | |
1790 break; | |
1791 case InstArithmetic::Fadd: | |
1792 _mov(T, Src0); | |
1793 _addss(T, Src1); | |
1794 _mov(Dest, T); | |
1795 break; | |
1796 case InstArithmetic::Fsub: | |
1797 _mov(T, Src0); | |
1798 _subss(T, Src1); | |
1799 _mov(Dest, T); | |
1800 break; | |
1801 case InstArithmetic::Fmul: | |
1802 _mov(T, Src0); | |
1803 _mulss(T, Src1); | |
1804 _mov(Dest, T); | |
1805 break; | |
1806 case InstArithmetic::Fdiv: | |
1807 _mov(T, Src0); | |
1808 _divss(T, Src1); | |
1809 _mov(Dest, T); | |
1810 break; | |
1811 case InstArithmetic::Frem: { | |
1812 const SizeT MaxSrcs = 2; | |
1813 Type Ty = Dest->getType(); | |
1814 InstCall *Call = | |
1815 makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, | |
1816 Dest, MaxSrcs); | |
1817 Call->addArg(Src0); | |
1818 Call->addArg(Src1); | |
1819 return lowerCall(Call); | |
1820 } break; | |
1821 } | |
1822 } | 1995 } |
1823 } | 1996 } |
1824 | 1997 |
1825 void TargetX8632::lowerAssign(const InstAssign *Inst) { | 1998 void TargetX8632::lowerAssign(const InstAssign *Inst) { |
1826 Variable *Dest = Inst->getDest(); | 1999 Variable *Dest = Inst->getDest(); |
1827 Operand *Src0 = Inst->getSrc(0); | 2000 Operand *Src0 = Inst->getSrc(0); |
1828 assert(Dest->getType() == Src0->getType()); | 2001 assert(Dest->getType() == Src0->getType()); |
1829 if (Dest->getType() == IceType_i64) { | 2002 if (Dest->getType() == IceType_i64) { |
1830 Src0 = legalize(Src0); | 2003 Src0 = legalize(Src0); |
1831 Operand *Src0Lo = loOperand(Src0); | 2004 Operand *Src0Lo = loOperand(Src0); |
(...skipping 1280 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3112 Context.insert( | 3285 Context.insert( |
3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 3286 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
3114 return; | 3287 return; |
3115 } | 3288 } |
3116 case Intrinsics::AtomicRMW: | 3289 case Intrinsics::AtomicRMW: |
3117 if (!Intrinsics::isMemoryOrderValid( | 3290 if (!Intrinsics::isMemoryOrderValid( |
3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3291 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
3119 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3292 Func->setError("Unexpected memory ordering for AtomicRMW"); |
3120 return; | 3293 return; |
3121 } | 3294 } |
3122 lowerAtomicRMW(Instr->getDest(), | 3295 lowerAtomicRMW( |
3123 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( | 3296 Instr->getDest(), |
3124 Instr->getArg(0))->getValue()), | 3297 static_cast<uint32_t>( |
3125 Instr->getArg(1), Instr->getArg(2)); | 3298 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
| 3299 Instr->getArg(1), Instr->getArg(2)); |
3126 return; | 3300 return; |
3127 case Intrinsics::AtomicStore: { | 3301 case Intrinsics::AtomicStore: { |
3128 if (!Intrinsics::isMemoryOrderValid( | 3302 if (!Intrinsics::isMemoryOrderValid( |
3129 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 3303 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
3130 Func->setError("Unexpected memory ordering for AtomicStore"); | 3304 Func->setError("Unexpected memory ordering for AtomicStore"); |
3131 return; | 3305 return; |
3132 } | 3306 } |
3133 // We require the memory address to be naturally aligned. | 3307 // We require the memory address to be naturally aligned. |
3134 // Given that is the case, then normal stores are atomic. | 3308 // Given that is the case, then normal stores are atomic. |
3135 // Add a fence after the store to make it visible. | 3309 // Add a fence after the store to make it visible. |
(...skipping 1877 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5013 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5187 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
5014 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5188 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
5015 } break; | 5189 } break; |
5016 } | 5190 } |
5017 } | 5191 } |
5018 | 5192 |
5019 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) | 5193 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) |
5020 : TargetHeaderLowering(Ctx) {} | 5194 : TargetHeaderLowering(Ctx) {} |
5021 | 5195 |
5022 } // end of namespace Ice | 5196 } // end of namespace Ice |
OLD | NEW |