Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(385)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1146803002: Subzero: Strength-reduce mul by certain constants. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Remove a TODO Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/assembler/x86/immediate_encodings.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 1269 matching lines...) Expand 10 before | Expand all | Expand 10 after
1280 // multiple of the required alignment at runtime. 1280 // multiple of the required alignment at runtime.
1281 Variable *T = makeReg(IceType_i32); 1281 Variable *T = makeReg(IceType_i32);
1282 _mov(T, TotalSize); 1282 _mov(T, TotalSize);
1283 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1283 _add(T, Ctx->getConstantInt32(Alignment - 1));
1284 _and(T, Ctx->getConstantInt32(-Alignment)); 1284 _and(T, Ctx->getConstantInt32(-Alignment));
1285 _sub(esp, T); 1285 _sub(esp, T);
1286 } 1286 }
1287 _mov(Dest, esp); 1287 _mov(Dest, esp);
1288 } 1288 }
1289 1289
1290 // Strength-reduce scalar integer multiplication by a constant (for
1291 // i32 or narrower) for certain constants. The lea instruction can be
1292 // used to multiply by 3, 5, or 9, and the lsh instruction can be used
1293 // to multiply by powers of 2. These can be combined such that
1294 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1295 // combined with left-shifting by 2.
1296 bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,
1297 int32_t Src1) {
1298 // Disable this optimization for Om1 and O0, just to keep things
1299 // simple there.
1300 if (Ctx->getFlags().getOptLevel() < Opt_1)
1301 return false;
1302 Type Ty = Dest->getType();
1303 Variable *T = nullptr;
1304 if (Src1 == -1) {
1305 _mov(T, Src0);
1306 _neg(T);
1307 _mov(Dest, T);
1308 return true;
1309 }
1310 if (Src1 == 0) {
1311 _mov(Dest, Ctx->getConstantZero(Ty));
1312 return true;
1313 }
1314 if (Src1 == 1) {
1315 _mov(T, Src0);
1316 _mov(Dest, T);
1317 return true;
1318 }
1319 // Don't bother with the edge case where Src1 == MININT.
1320 if (Src1 == -Src1)
1321 return false;
1322 const bool Src1IsNegative = Src1 < 0;
1323 if (Src1IsNegative)
1324 Src1 = -Src1;
1325 uint32_t Count9 = 0;
1326 uint32_t Count5 = 0;
1327 uint32_t Count3 = 0;
1328 uint32_t Count2 = 0;
1329 uint32_t CountOps = 0;
1330 while (Src1 > 1) {
1331 if (Src1 % 9 == 0) {
1332 ++CountOps;
1333 ++Count9;
1334 Src1 /= 9;
1335 } else if (Src1 % 5 == 0) {
1336 ++CountOps;
1337 ++Count5;
1338 Src1 /= 5;
1339 } else if (Src1 % 3 == 0) {
1340 ++CountOps;
1341 ++Count3;
1342 Src1 /= 3;
1343 } else if (Src1 % 2 == 0) {
1344 if (Count2 == 0)
1345 ++CountOps;
1346 ++Count2;
1347 Src1 /= 2;
1348 } else {
1349 return false;
1350 }
1351 }
1352 // Lea optimization only works for i16 and i32 types, not i8.
1353 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1354 return false;
1355 // Limit the number of lea/shl operations for a single multiply, to
1356 // a somewhat arbitrary choice of 3.
1357 const uint32_t MaxOpsForOptimizedMul = 3;
1358 if (CountOps > MaxOpsForOptimizedMul)
1359 return false;
1360 _mov(T, Src0);
1361 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1362 for (uint32_t i = 0; i < Count9; ++i) {
1363 const uint16_t Shift = 3; // log2(9-1)
1364 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1365 _set_dest_nonkillable();
1366 }
1367 for (uint32_t i = 0; i < Count5; ++i) {
1368 const uint16_t Shift = 2; // log2(5-1)
1369 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1370 _set_dest_nonkillable();
1371 }
1372 for (uint32_t i = 0; i < Count3; ++i) {
1373 const uint16_t Shift = 1; // log2(3-1)
1374 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1375 _set_dest_nonkillable();
1376 }
1377 if (Count2) {
1378 _shl(T, Ctx->getConstantInt(Ty, Count2));
1379 }
1380 if (Src1IsNegative)
1381 _neg(T);
1382 _mov(Dest, T);
1383 return true;
1384 }
1385
1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1386 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1291 Variable *Dest = Inst->getDest(); 1387 Variable *Dest = Inst->getDest();
1292 Operand *Src0 = legalize(Inst->getSrc(0)); 1388 Operand *Src0 = legalize(Inst->getSrc(0));
1293 Operand *Src1 = legalize(Inst->getSrc(1)); 1389 Operand *Src1 = legalize(Inst->getSrc(1));
1294 if (Inst->isCommutative()) { 1390 if (Inst->isCommutative()) {
1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1391 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1296 std::swap(Src0, Src1); 1392 std::swap(Src0, Src1);
1393 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1394 std::swap(Src0, Src1);
1297 } 1395 }
1298 if (Dest->getType() == IceType_i64) { 1396 if (Dest->getType() == IceType_i64) {
1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1397 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1398 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1301 Operand *Src0Lo = loOperand(Src0); 1399 Operand *Src0Lo = loOperand(Src0);
1302 Operand *Src0Hi = hiOperand(Src0); 1400 Operand *Src0Hi = hiOperand(Src0);
1303 Operand *Src1Lo = loOperand(Src1); 1401 Operand *Src1Lo = loOperand(Src1);
1304 Operand *Src1Hi = hiOperand(Src1); 1402 Operand *Src1Hi = hiOperand(Src1);
1305 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1403 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1306 switch (Inst->getOp()) { 1404 switch (Inst->getOp()) {
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
1514 lowerCall(Call); 1612 lowerCall(Call);
1515 } break; 1613 } break;
1516 case InstArithmetic::Fadd: 1614 case InstArithmetic::Fadd:
1517 case InstArithmetic::Fsub: 1615 case InstArithmetic::Fsub:
1518 case InstArithmetic::Fmul: 1616 case InstArithmetic::Fmul:
1519 case InstArithmetic::Fdiv: 1617 case InstArithmetic::Fdiv:
1520 case InstArithmetic::Frem: 1618 case InstArithmetic::Frem:
1521 llvm_unreachable("FP instruction with i64 type"); 1619 llvm_unreachable("FP instruction with i64 type");
1522 break; 1620 break;
1523 } 1621 }
1524 } else if (isVectorType(Dest->getType())) { 1622 return;
1623 }
1624 if (isVectorType(Dest->getType())) {
1525 // TODO: Trap on integer divide and integer modulo by zero. 1625 // TODO: Trap on integer divide and integer modulo by zero.
1526 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1626 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1527 if (llvm::isa<OperandX8632Mem>(Src1)) 1627 if (llvm::isa<OperandX8632Mem>(Src1))
1528 Src1 = legalizeToVar(Src1); 1628 Src1 = legalizeToVar(Src1);
1529 switch (Inst->getOp()) { 1629 switch (Inst->getOp()) {
1530 case InstArithmetic::_num: 1630 case InstArithmetic::_num:
1531 llvm_unreachable("Unknown arithmetic operator"); 1631 llvm_unreachable("Unknown arithmetic operator");
1532 break; 1632 break;
1533 case InstArithmetic::Add: { 1633 case InstArithmetic::Add: {
1534 Variable *T = makeReg(Dest->getType()); 1634 Variable *T = makeReg(Dest->getType());
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
1643 case InstArithmetic::Fdiv: { 1743 case InstArithmetic::Fdiv: {
1644 Variable *T = makeReg(Dest->getType()); 1744 Variable *T = makeReg(Dest->getType());
1645 _movp(T, Src0); 1745 _movp(T, Src0);
1646 _divps(T, Src1); 1746 _divps(T, Src1);
1647 _movp(Dest, T); 1747 _movp(Dest, T);
1648 } break; 1748 } break;
1649 case InstArithmetic::Frem: 1749 case InstArithmetic::Frem:
1650 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1750 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1651 break; 1751 break;
1652 } 1752 }
1653 } else { // Dest->getType() is non-i64 scalar 1753 return;
1654 Variable *T_edx = nullptr; 1754 }
1655 Variable *T = nullptr; 1755 Variable *T_edx = nullptr;
1656 switch (Inst->getOp()) { 1756 Variable *T = nullptr;
1657 case InstArithmetic::_num: 1757 switch (Inst->getOp()) {
1658 llvm_unreachable("Unknown arithmetic operator"); 1758 case InstArithmetic::_num:
1659 break; 1759 llvm_unreachable("Unknown arithmetic operator");
1660 case InstArithmetic::Add: 1760 break;
1761 case InstArithmetic::Add:
1762 _mov(T, Src0);
1763 _add(T, Src1);
1764 _mov(Dest, T);
1765 break;
1766 case InstArithmetic::And:
1767 _mov(T, Src0);
1768 _and(T, Src1);
1769 _mov(Dest, T);
1770 break;
1771 case InstArithmetic::Or:
1772 _mov(T, Src0);
1773 _or(T, Src1);
1774 _mov(Dest, T);
1775 break;
1776 case InstArithmetic::Xor:
1777 _mov(T, Src0);
1778 _xor(T, Src1);
1779 _mov(Dest, T);
1780 break;
1781 case InstArithmetic::Sub:
1782 _mov(T, Src0);
1783 _sub(T, Src1);
1784 _mov(Dest, T);
1785 break;
1786 case InstArithmetic::Mul:
1787 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1788 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1789 return;
1790 }
1791 // The 8-bit version of imul only allows the form "imul r/m8"
1792 // where T must be in eax.
1793 if (isByteSizedArithType(Dest->getType())) {
1794 _mov(T, Src0, RegX8632::Reg_eax);
1795 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1796 } else {
1661 _mov(T, Src0); 1797 _mov(T, Src0);
1662 _add(T, Src1); 1798 }
1663 _mov(Dest, T); 1799 _imul(T, Src1);
1664 break; 1800 _mov(Dest, T);
1665 case InstArithmetic::And: 1801 break;
1666 _mov(T, Src0); 1802 case InstArithmetic::Shl:
1667 _and(T, Src1); 1803 _mov(T, Src0);
1668 _mov(Dest, T); 1804 if (!llvm::isa<Constant>(Src1))
1669 break; 1805 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
1670 case InstArithmetic::Or: 1806 _shl(T, Src1);
1671 _mov(T, Src0); 1807 _mov(Dest, T);
1672 _or(T, Src1); 1808 break;
1673 _mov(Dest, T); 1809 case InstArithmetic::Lshr:
1674 break; 1810 _mov(T, Src0);
1675 case InstArithmetic::Xor: 1811 if (!llvm::isa<Constant>(Src1))
1676 _mov(T, Src0); 1812 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
1677 _xor(T, Src1); 1813 _shr(T, Src1);
1678 _mov(Dest, T); 1814 _mov(Dest, T);
1679 break; 1815 break;
1680 case InstArithmetic::Sub: 1816 case InstArithmetic::Ashr:
1681 _mov(T, Src0); 1817 _mov(T, Src0);
1682 _sub(T, Src1); 1818 if (!llvm::isa<Constant>(Src1))
1683 _mov(Dest, T); 1819 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
1684 break; 1820 _sar(T, Src1);
1685 case InstArithmetic::Mul: 1821 _mov(Dest, T);
1686 // TODO: Optimize for llvm::isa<Constant>(Src1) 1822 break;
1687 // TODO: Strength-reduce multiplications by a constant, 1823 case InstArithmetic::Udiv:
1688 // particularly -1 and powers of 2. Advanced: use lea to 1824 // div and idiv are the few arithmetic operators that do not allow
1689 // multiply by 3, 5, 9. 1825 // immediates as the operand.
1690 // 1826 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1691 // The 8-bit version of imul only allows the form "imul r/m8" 1827 if (isByteSizedArithType(Dest->getType())) {
1692 // where T must be in eax. 1828 Variable *T_ah = nullptr;
1693 if (isByteSizedArithType(Dest->getType())) { 1829 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1694 _mov(T, Src0, RegX8632::Reg_eax); 1830 _mov(T, Src0, RegX8632::Reg_eax);
1695 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1831 _mov(T_ah, Zero, RegX8632::Reg_ah);
1696 } else { 1832 _div(T, Src1, T_ah);
1697 _mov(T, Src0); 1833 _mov(Dest, T);
1834 } else {
1835 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1836 _mov(T, Src0, RegX8632::Reg_eax);
1837 _mov(T_edx, Zero, RegX8632::Reg_edx);
1838 _div(T, Src1, T_edx);
1839 _mov(Dest, T);
1840 }
1841 break;
1842 case InstArithmetic::Sdiv:
1843 // TODO(stichnot): Enable this after doing better performance
1844 // and cross testing.
1845 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1846 // Optimize division by constant power of 2, but not for Om1
1847 // or O0, just to keep things simple there.
1848 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1849 int32_t Divisor = C->getValue();
1850 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1851 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1852 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1853 Type Ty = Dest->getType();
1854 // LLVM does the following for dest=src/(1<<log):
1855 // t=src
1856 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1857 // shr t,typewidth-log
1858 // add t,src
1859 // sar t,log
1860 // dest=t
1861 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
1862 _mov(T, Src0);
1863 // If for some reason we are dividing by 1, just treat it
1864 // like an assignment.
1865 if (LogDiv > 0) {
1866 // The initial sar is unnecessary when dividing by 2.
1867 if (LogDiv > 1)
1868 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1869 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1870 _add(T, Src0);
1871 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1872 }
1873 _mov(Dest, T);
1874 return;
1875 }
1698 } 1876 }
1699 _imul(T, Src1); 1877 }
1700 _mov(Dest, T); 1878 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1701 break; 1879 if (isByteSizedArithType(Dest->getType())) {
1702 case InstArithmetic::Shl: 1880 _mov(T, Src0, RegX8632::Reg_eax);
1703 _mov(T, Src0); 1881 _cbwdq(T, T);
1704 if (!llvm::isa<Constant>(Src1)) 1882 _idiv(T, Src1, T);
1705 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); 1883 _mov(Dest, T);
1706 _shl(T, Src1); 1884 } else {
1707 _mov(Dest, T); 1885 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1708 break; 1886 _mov(T, Src0, RegX8632::Reg_eax);
1709 case InstArithmetic::Lshr: 1887 _cbwdq(T_edx, T);
1710 _mov(T, Src0); 1888 _idiv(T, Src1, T_edx);
1711 if (!llvm::isa<Constant>(Src1)) 1889 _mov(Dest, T);
1712 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); 1890 }
1713 _shr(T, Src1); 1891 break;
1714 _mov(Dest, T); 1892 case InstArithmetic::Urem:
1715 break; 1893 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1716 case InstArithmetic::Ashr: 1894 if (isByteSizedArithType(Dest->getType())) {
1717 _mov(T, Src0); 1895 Variable *T_ah = nullptr;
1718 if (!llvm::isa<Constant>(Src1)) 1896 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1719 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx); 1897 _mov(T, Src0, RegX8632::Reg_eax);
1720 _sar(T, Src1); 1898 _mov(T_ah, Zero, RegX8632::Reg_ah);
1721 _mov(Dest, T); 1899 _div(T_ah, Src1, T);
1722 break; 1900 _mov(Dest, T_ah);
1723 case InstArithmetic::Udiv: 1901 } else {
1724 // div and idiv are the few arithmetic operators that do not allow 1902 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1725 // immediates as the operand. 1903 _mov(T_edx, Zero, RegX8632::Reg_edx);
1726 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1904 _mov(T, Src0, RegX8632::Reg_eax);
1727 if (isByteSizedArithType(Dest->getType())) { 1905 _div(T_edx, Src1, T);
1728 Variable *T_ah = nullptr; 1906 _mov(Dest, T_edx);
1729 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1907 }
1730 _mov(T, Src0, RegX8632::Reg_eax); 1908 break;
1731 _mov(T_ah, Zero, RegX8632::Reg_ah); 1909 case InstArithmetic::Srem:
1732 _div(T, Src1, T_ah); 1910 // TODO(stichnot): Enable this after doing better performance
1733 _mov(Dest, T); 1911 // and cross testing.
1734 } else { 1912 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1735 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1913 // Optimize mod by constant power of 2, but not for Om1 or O0,
1736 _mov(T, Src0, RegX8632::Reg_eax); 1914 // just to keep things simple there.
1737 _mov(T_edx, Zero, RegX8632::Reg_edx); 1915 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1738 _div(T, Src1, T_edx); 1916 int32_t Divisor = C->getValue();
1739 _mov(Dest, T); 1917 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1918 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1919 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1920 Type Ty = Dest->getType();
1921 // LLVM does the following for dest=src%(1<<log):
1922 // t=src
1923 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1924 // shr t,typewidth-log
1925 // add t,src
1926 // and t, -(1<<log)
1927 // sub t,src
1928 // neg t
1929 // dest=t
1930 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
1931 // If for some reason we are dividing by 1, just assign 0.
1932 if (LogDiv == 0) {
1933 _mov(Dest, Ctx->getConstantZero(Ty));
1934 return;
1935 }
1936 _mov(T, Src0);
1937 // The initial sar is unnecessary when dividing by 2.
1938 if (LogDiv > 1)
1939 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1940 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1941 _add(T, Src0);
1942 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1943 _sub(T, Src0);
1944 _neg(T);
1945 _mov(Dest, T);
1946 return;
1947 }
1740 } 1948 }
1741 break; 1949 }
1742 case InstArithmetic::Sdiv: 1950 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1743 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1951 if (isByteSizedArithType(Dest->getType())) {
1744 if (isByteSizedArithType(Dest->getType())) { 1952 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1745 _mov(T, Src0, RegX8632::Reg_eax); 1953 _mov(T, Src0, RegX8632::Reg_eax);
1746 _cbwdq(T, T); 1954 _cbwdq(T, T);
1747 _idiv(T, Src1, T); 1955 Context.insert(InstFakeDef::create(Func, T_ah));
1748 _mov(Dest, T); 1956 _idiv(T_ah, Src1, T);
1749 } else { 1957 _mov(Dest, T_ah);
1750 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 1958 } else {
1751 _mov(T, Src0, RegX8632::Reg_eax); 1959 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1752 _cbwdq(T_edx, T); 1960 _mov(T, Src0, RegX8632::Reg_eax);
1753 _idiv(T, Src1, T_edx); 1961 _cbwdq(T_edx, T);
1754 _mov(Dest, T); 1962 _idiv(T_edx, Src1, T);
1755 } 1963 _mov(Dest, T_edx);
1756 break; 1964 }
1757 case InstArithmetic::Urem: 1965 break;
1758 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1966 case InstArithmetic::Fadd:
1759 if (isByteSizedArithType(Dest->getType())) { 1967 _mov(T, Src0);
1760 Variable *T_ah = nullptr; 1968 _addss(T, Src1);
1761 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1969 _mov(Dest, T);
1762 _mov(T, Src0, RegX8632::Reg_eax); 1970 break;
1763 _mov(T_ah, Zero, RegX8632::Reg_ah); 1971 case InstArithmetic::Fsub:
1764 _div(T_ah, Src1, T); 1972 _mov(T, Src0);
1765 _mov(Dest, T_ah); 1973 _subss(T, Src1);
1766 } else { 1974 _mov(Dest, T);
1767 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1975 break;
1768 _mov(T_edx, Zero, RegX8632::Reg_edx); 1976 case InstArithmetic::Fmul:
1769 _mov(T, Src0, RegX8632::Reg_eax); 1977 _mov(T, Src0);
1770 _div(T_edx, Src1, T); 1978 _mulss(T, Src1);
1771 _mov(Dest, T_edx); 1979 _mov(Dest, T);
1772 } 1980 break;
1773 break; 1981 case InstArithmetic::Fdiv:
1774 case InstArithmetic::Srem: 1982 _mov(T, Src0);
1775 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1983 _divss(T, Src1);
1776 if (isByteSizedArithType(Dest->getType())) { 1984 _mov(Dest, T);
1777 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah); 1985 break;
1778 _mov(T, Src0, RegX8632::Reg_eax); 1986 case InstArithmetic::Frem: {
1779 _cbwdq(T, T); 1987 const SizeT MaxSrcs = 2;
1780 Context.insert(InstFakeDef::create(Func, T_ah)); 1988 Type Ty = Dest->getType();
1781 _idiv(T_ah, Src1, T); 1989 InstCall *Call = makeHelperCall(
1782 _mov(Dest, T_ah); 1990 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
1783 } else { 1991 Call->addArg(Src0);
1784 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); 1992 Call->addArg(Src1);
1785 _mov(T, Src0, RegX8632::Reg_eax); 1993 return lowerCall(Call);
1786 _cbwdq(T_edx, T); 1994 }
1787 _idiv(T_edx, Src1, T);
1788 _mov(Dest, T_edx);
1789 }
1790 break;
1791 case InstArithmetic::Fadd:
1792 _mov(T, Src0);
1793 _addss(T, Src1);
1794 _mov(Dest, T);
1795 break;
1796 case InstArithmetic::Fsub:
1797 _mov(T, Src0);
1798 _subss(T, Src1);
1799 _mov(Dest, T);
1800 break;
1801 case InstArithmetic::Fmul:
1802 _mov(T, Src0);
1803 _mulss(T, Src1);
1804 _mov(Dest, T);
1805 break;
1806 case InstArithmetic::Fdiv:
1807 _mov(T, Src0);
1808 _divss(T, Src1);
1809 _mov(Dest, T);
1810 break;
1811 case InstArithmetic::Frem: {
1812 const SizeT MaxSrcs = 2;
1813 Type Ty = Dest->getType();
1814 InstCall *Call =
1815 makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64,
1816 Dest, MaxSrcs);
1817 Call->addArg(Src0);
1818 Call->addArg(Src1);
1819 return lowerCall(Call);
1820 } break;
1821 }
1822 } 1995 }
1823 } 1996 }
1824 1997
1825 void TargetX8632::lowerAssign(const InstAssign *Inst) { 1998 void TargetX8632::lowerAssign(const InstAssign *Inst) {
1826 Variable *Dest = Inst->getDest(); 1999 Variable *Dest = Inst->getDest();
1827 Operand *Src0 = Inst->getSrc(0); 2000 Operand *Src0 = Inst->getSrc(0);
1828 assert(Dest->getType() == Src0->getType()); 2001 assert(Dest->getType() == Src0->getType());
1829 if (Dest->getType() == IceType_i64) { 2002 if (Dest->getType() == IceType_i64) {
1830 Src0 = legalize(Src0); 2003 Src0 = legalize(Src0);
1831 Operand *Src0Lo = loOperand(Src0); 2004 Operand *Src0Lo = loOperand(Src0);
(...skipping 1280 matching lines...) Expand 10 before | Expand all | Expand 10 after
3112 Context.insert( 3285 Context.insert(
3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 3286 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
3114 return; 3287 return;
3115 } 3288 }
3116 case Intrinsics::AtomicRMW: 3289 case Intrinsics::AtomicRMW:
3117 if (!Intrinsics::isMemoryOrderValid( 3290 if (!Intrinsics::isMemoryOrderValid(
3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 3291 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3119 Func->setError("Unexpected memory ordering for AtomicRMW"); 3292 Func->setError("Unexpected memory ordering for AtomicRMW");
3120 return; 3293 return;
3121 } 3294 }
3122 lowerAtomicRMW(Instr->getDest(), 3295 lowerAtomicRMW(
3123 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( 3296 Instr->getDest(),
3124 Instr->getArg(0))->getValue()), 3297 static_cast<uint32_t>(
3125 Instr->getArg(1), Instr->getArg(2)); 3298 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3299 Instr->getArg(1), Instr->getArg(2));
3126 return; 3300 return;
3127 case Intrinsics::AtomicStore: { 3301 case Intrinsics::AtomicStore: {
3128 if (!Intrinsics::isMemoryOrderValid( 3302 if (!Intrinsics::isMemoryOrderValid(
3129 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 3303 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
3130 Func->setError("Unexpected memory ordering for AtomicStore"); 3304 Func->setError("Unexpected memory ordering for AtomicStore");
3131 return; 3305 return;
3132 } 3306 }
3133 // We require the memory address to be naturally aligned. 3307 // We require the memory address to be naturally aligned.
3134 // Given that is the case, then normal stores are atomic. 3308 // Given that is the case, then normal stores are atomic.
3135 // Add a fence after the store to make it visible. 3309 // Add a fence after the store to make it visible.
(...skipping 1877 matching lines...) Expand 10 before | Expand all | Expand 10 after
5013 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5187 emitConstantPool<PoolTypeConverter<float>>(Ctx);
5014 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5188 emitConstantPool<PoolTypeConverter<double>>(Ctx);
5015 } break; 5189 } break;
5016 } 5190 }
5017 } 5191 }
5018 5192
5019 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) 5193 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
5020 : TargetHeaderLowering(Ctx) {} 5194 : TargetHeaderLowering(Ctx) {}
5021 5195
5022 } // end of namespace Ice 5196 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/assembler/x86/immediate_encodings.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698