src/IceTargetLoweringX8632.cpp - Issue 1146803002: Subzero: Strength-reduce mul by certain constants.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1146803002: Subzero: Strength-reduce mul by certain constants. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Remove a TODO Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 1269 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1280 // multiple of the required alignment at runtime.	1280 // multiple of the required alignment at runtime.

1281 Variable *T = makeReg(IceType_i32);	1281 Variable *T = makeReg(IceType_i32);

1282 _mov(T, TotalSize);	1282 _mov(T, TotalSize);

1283 _add(T, Ctx->getConstantInt32(Alignment - 1));	1283 _add(T, Ctx->getConstantInt32(Alignment - 1));

1284 _and(T, Ctx->getConstantInt32(-Alignment));	1284 _and(T, Ctx->getConstantInt32(-Alignment));

1285 _sub(esp, T);	1285 _sub(esp, T);

1286 }	1286 }

1287 _mov(Dest, esp);	1287 _mov(Dest, esp);

1288 }	1288 }

1289	1289

	1290 // Strength-reduce scalar integer multiplication by a constant (for

	1291 // i32 or narrower) for certain constants. The lea instruction can be

	1292 // used to multiply by 3, 5, or 9, and the lsh instruction can be used

	1293 // to multiply by powers of 2. These can be combined such that

	1294 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

	1295 // combined with left-shifting by 2.

	1296 bool TargetX8632::optimizeScalarMul(Variable Dest, Operand Src0,

	1297 int32_t Src1) {

	1298 // Disable this optimization for Om1 and O0, just to keep things

	1299 // simple there.

	1300 if (Ctx->getFlags().getOptLevel() < Opt_1)

	1301 return false;

	1302 Type Ty = Dest->getType();

	1303 Variable *T = nullptr;

	1304 if (Src1 == -1) {

	1305 _mov(T, Src0);

	1306 _neg(T);

	1307 _mov(Dest, T);

	1308 return true;

	1309 }

	1310 if (Src1 == 0) {

	1311 _mov(Dest, Ctx->getConstantZero(Ty));

	1312 return true;

	1313 }

	1314 if (Src1 == 1) {

	1315 _mov(T, Src0);

	1316 _mov(Dest, T);

	1317 return true;

	1318 }

	1319 // Don't bother with the edge case where Src1 == MININT.

	1320 if (Src1 == -Src1)

	1321 return false;

	1322 const bool Src1IsNegative = Src1 < 0;

	1323 if (Src1IsNegative)

	1324 Src1 = -Src1;

	1325 uint32_t Count9 = 0;

	1326 uint32_t Count5 = 0;

	1327 uint32_t Count3 = 0;

	1328 uint32_t Count2 = 0;

	1329 uint32_t CountOps = 0;

	1330 while (Src1 > 1) {

	1331 if (Src1 % 9 == 0) {

	1332 ++CountOps;

	1333 ++Count9;

	1334 Src1 /= 9;

	1335 } else if (Src1 % 5 == 0) {

	1336 ++CountOps;

	1337 ++Count5;

	1338 Src1 /= 5;

	1339 } else if (Src1 % 3 == 0) {

	1340 ++CountOps;

	1341 ++Count3;

	1342 Src1 /= 3;

	1343 } else if (Src1 % 2 == 0) {

	1344 if (Count2 == 0)

	1345 ++CountOps;

	1346 ++Count2;

	1347 Src1 /= 2;

	1348 } else {

	1349 return false;

	1350 }

	1351 }

	1352 // Lea optimization only works for i16 and i32 types, not i8.

	1353 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))

	1354 return false;

	1355 // Limit the number of lea/shl operations for a single multiply, to

	1356 // a somewhat arbitrary choice of 3.

	1357 const uint32_t MaxOpsForOptimizedMul = 3;

	1358 if (CountOps > MaxOpsForOptimizedMul)

	1359 return false;

	1360 _mov(T, Src0);

	1361 Constant *Zero = Ctx->getConstantZero(IceType_i32);

	1362 for (uint32_t i = 0; i < Count9; ++i) {

	1363 const uint16_t Shift = 3; // log2(9-1)

	1364 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

	1365 _set_dest_nonkillable();

	1366 }

	1367 for (uint32_t i = 0; i < Count5; ++i) {

	1368 const uint16_t Shift = 2; // log2(5-1)

	1369 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

	1370 _set_dest_nonkillable();

	1371 }

	1372 for (uint32_t i = 0; i < Count3; ++i) {

	1373 const uint16_t Shift = 1; // log2(3-1)

	1374 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

	1375 _set_dest_nonkillable();

	1376 }

	1377 if (Count2) {

	1378 _shl(T, Ctx->getConstantInt(Ty, Count2));

	1379 }

	1380 if (Src1IsNegative)

	1381 _neg(T);

	1382 _mov(Dest, T);

	1383 return true;

	1384 }

	1385

1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {	1386 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

1291 Variable *Dest = Inst->getDest();	1387 Variable *Dest = Inst->getDest();

1292 Operand *Src0 = legalize(Inst->getSrc(0));	1388 Operand *Src0 = legalize(Inst->getSrc(0));

1293 Operand *Src1 = legalize(Inst->getSrc(1));	1389 Operand *Src1 = legalize(Inst->getSrc(1));

1294 if (Inst->isCommutative()) {	1390 if (Inst->isCommutative()) {

1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))	1391 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

1296 std::swap(Src0, Src1);	1392 std::swap(Src0, Src1);

	1393 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))

	1394 std::swap(Src0, Src1);

1297 }	1395 }

1298 if (Dest->getType() == IceType_i64) {	1396 if (Dest->getType() == IceType_i64) {

1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1397 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1398 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1301 Operand *Src0Lo = loOperand(Src0);	1399 Operand *Src0Lo = loOperand(Src0);

1302 Operand *Src0Hi = hiOperand(Src0);	1400 Operand *Src0Hi = hiOperand(Src0);

1303 Operand *Src1Lo = loOperand(Src1);	1401 Operand *Src1Lo = loOperand(Src1);

1304 Operand *Src1Hi = hiOperand(Src1);	1402 Operand *Src1Hi = hiOperand(Src1);

1305 Variable T_Lo = nullptr, T_Hi = nullptr;	1403 Variable T_Lo = nullptr, T_Hi = nullptr;

1306 switch (Inst->getOp()) {	1404 switch (Inst->getOp()) {

(...skipping 207 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1514 lowerCall(Call);	1612 lowerCall(Call);

1515 } break;	1613 } break;

1516 case InstArithmetic::Fadd:	1614 case InstArithmetic::Fadd:

1517 case InstArithmetic::Fsub:	1615 case InstArithmetic::Fsub:

1518 case InstArithmetic::Fmul:	1616 case InstArithmetic::Fmul:

1519 case InstArithmetic::Fdiv:	1617 case InstArithmetic::Fdiv:

1520 case InstArithmetic::Frem:	1618 case InstArithmetic::Frem:

1521 llvm_unreachable("FP instruction with i64 type");	1619 llvm_unreachable("FP instruction with i64 type");

1522 break;	1620 break;

1523 }	1621 }

1524 } else if (isVectorType(Dest->getType())) {	1622 return;

	1623 }

	1624 if (isVectorType(Dest->getType())) {

1525 // TODO: Trap on integer divide and integer modulo by zero.	1625 // TODO: Trap on integer divide and integer modulo by zero.

1526 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899	1626 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1527 if (llvm::isa<OperandX8632Mem>(Src1))	1627 if (llvm::isa<OperandX8632Mem>(Src1))

1528 Src1 = legalizeToVar(Src1);	1628 Src1 = legalizeToVar(Src1);

1529 switch (Inst->getOp()) {	1629 switch (Inst->getOp()) {

1530 case InstArithmetic::_num:	1630 case InstArithmetic::_num:

1531 llvm_unreachable("Unknown arithmetic operator");	1631 llvm_unreachable("Unknown arithmetic operator");

1532 break;	1632 break;

1533 case InstArithmetic::Add: {	1633 case InstArithmetic::Add: {

1534 Variable *T = makeReg(Dest->getType());	1634 Variable *T = makeReg(Dest->getType());

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1643 case InstArithmetic::Fdiv: {	1743 case InstArithmetic::Fdiv: {

1644 Variable *T = makeReg(Dest->getType());	1744 Variable *T = makeReg(Dest->getType());

1645 _movp(T, Src0);	1745 _movp(T, Src0);

1646 _divps(T, Src1);	1746 _divps(T, Src1);

1647 _movp(Dest, T);	1747 _movp(Dest, T);

1648 } break;	1748 } break;

1649 case InstArithmetic::Frem:	1749 case InstArithmetic::Frem:

1650 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1750 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1651 break;	1751 break;

1652 }	1752 }

1653 } else { // Dest->getType() is non-i64 scalar	1753 return;

1654 Variable *T_edx = nullptr;	1754 }

1655 Variable *T = nullptr;	1755 Variable *T_edx = nullptr;

1656 switch (Inst->getOp()) {	1756 Variable *T = nullptr;

1657 case InstArithmetic::_num:	1757 switch (Inst->getOp()) {

1658 llvm_unreachable("Unknown arithmetic operator");	1758 case InstArithmetic::_num:

1659 break;	1759 llvm_unreachable("Unknown arithmetic operator");

1660 case InstArithmetic::Add:	1760 break;

	1761 case InstArithmetic::Add:

	1762 _mov(T, Src0);

	1763 _add(T, Src1);

	1764 _mov(Dest, T);

	1765 break;

	1766 case InstArithmetic::And:

	1767 _mov(T, Src0);

	1768 _and(T, Src1);

	1769 _mov(Dest, T);

	1770 break;

	1771 case InstArithmetic::Or:

	1772 _mov(T, Src0);

	1773 _or(T, Src1);

	1774 _mov(Dest, T);

	1775 break;

	1776 case InstArithmetic::Xor:

	1777 _mov(T, Src0);

	1778 _xor(T, Src1);

	1779 _mov(Dest, T);

	1780 break;

	1781 case InstArithmetic::Sub:

	1782 _mov(T, Src0);

	1783 _sub(T, Src1);

	1784 _mov(Dest, T);

	1785 break;

	1786 case InstArithmetic::Mul:

	1787 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

	1788 if (optimizeScalarMul(Dest, Src0, C->getValue()))

	1789 return;

	1790 }

	1791 // The 8-bit version of imul only allows the form "imul r/m8"

	1792 // where T must be in eax.

	1793 if (isByteSizedArithType(Dest->getType())) {

	1794 _mov(T, Src0, RegX8632::Reg_eax);

	1795 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

	1796 } else {

1661 _mov(T, Src0);	1797 _mov(T, Src0);

1662 _add(T, Src1);	1798 }

1663 _mov(Dest, T);	1799 _imul(T, Src1);

1664 break;	1800 _mov(Dest, T);

1665 case InstArithmetic::And:	1801 break;

1666 _mov(T, Src0);	1802 case InstArithmetic::Shl:

1667 _and(T, Src1);	1803 _mov(T, Src0);

1668 _mov(Dest, T);	1804 if (!llvm::isa<Constant>(Src1))

1669 break;	1805 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);

1670 case InstArithmetic::Or:	1806 _shl(T, Src1);

1671 _mov(T, Src0);	1807 _mov(Dest, T);

1672 _or(T, Src1);	1808 break;

1673 _mov(Dest, T);	1809 case InstArithmetic::Lshr:

1674 break;	1810 _mov(T, Src0);

1675 case InstArithmetic::Xor:	1811 if (!llvm::isa<Constant>(Src1))

1676 _mov(T, Src0);	1812 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);

1677 _xor(T, Src1);	1813 _shr(T, Src1);

1678 _mov(Dest, T);	1814 _mov(Dest, T);

1679 break;	1815 break;

1680 case InstArithmetic::Sub:	1816 case InstArithmetic::Ashr:

1681 _mov(T, Src0);	1817 _mov(T, Src0);

1682 _sub(T, Src1);	1818 if (!llvm::isa<Constant>(Src1))

1683 _mov(Dest, T);	1819 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);

1684 break;	1820 _sar(T, Src1);

1685 case InstArithmetic::Mul:	1821 _mov(Dest, T);

1686 // TODO: Optimize for llvm::isa<Constant>(Src1)	1822 break;

1687 // TODO: Strength-reduce multiplications by a constant,	1823 case InstArithmetic::Udiv:

1688 // particularly -1 and powers of 2. Advanced: use lea to	1824 // div and idiv are the few arithmetic operators that do not allow

1689 // multiply by 3, 5, 9.	1825 // immediates as the operand.

1690 //	1826 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1691 // The 8-bit version of imul only allows the form "imul r/m8"	1827 if (isByteSizedArithType(Dest->getType())) {

1692 // where T must be in eax.	1828 Variable *T_ah = nullptr;

1693 if (isByteSizedArithType(Dest->getType())) {	1829 Constant *Zero = Ctx->getConstantZero(IceType_i8);

1694 _mov(T, Src0, RegX8632::Reg_eax);	1830 _mov(T, Src0, RegX8632::Reg_eax);

1695 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1831 _mov(T_ah, Zero, RegX8632::Reg_ah);

1696 } else {	1832 _div(T, Src1, T_ah);

1697 _mov(T, Src0);	1833 _mov(Dest, T);

	1834 } else {

	1835 Constant *Zero = Ctx->getConstantZero(IceType_i32);

	1836 _mov(T, Src0, RegX8632::Reg_eax);

	1837 _mov(T_edx, Zero, RegX8632::Reg_edx);

	1838 _div(T, Src1, T_edx);

	1839 _mov(Dest, T);

	1840 }

	1841 break;

	1842 case InstArithmetic::Sdiv:

	1843 // TODO(stichnot): Enable this after doing better performance

	1844 // and cross testing.

	1845 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

	1846 // Optimize division by constant power of 2, but not for Om1

	1847 // or O0, just to keep things simple there.

	1848 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

	1849 int32_t Divisor = C->getValue();

	1850 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

	1851 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

	1852 uint32_t LogDiv = llvm::Log2_32(UDivisor);

	1853 Type Ty = Dest->getType();

	1854 // LLVM does the following for dest=src/(1<<log):

	1855 // t=src

	1856 // sar t,typewidth-1 // -1 if src is negative, 0 if not

	1857 // shr t,typewidth-log

	1858 // add t,src

	1859 // sar t,log

	1860 // dest=t

	1861 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

	1862 _mov(T, Src0);

	1863 // If for some reason we are dividing by 1, just treat it

	1864 // like an assignment.

	1865 if (LogDiv > 0) {

	1866 // The initial sar is unnecessary when dividing by 2.

	1867 if (LogDiv > 1)

	1868 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

	1869 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

	1870 _add(T, Src0);

	1871 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

	1872 }

	1873 _mov(Dest, T);

	1874 return;

	1875 }

1698 }	1876 }

1699 _imul(T, Src1);	1877 }

1700 _mov(Dest, T);	1878 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1701 break;	1879 if (isByteSizedArithType(Dest->getType())) {

1702 case InstArithmetic::Shl:	1880 _mov(T, Src0, RegX8632::Reg_eax);

1703 _mov(T, Src0);	1881 _cbwdq(T, T);

1704 if (!llvm::isa<Constant>(Src1))	1882 _idiv(T, Src1, T);

1705 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);	1883 _mov(Dest, T);

1706 _shl(T, Src1);	1884 } else {

1707 _mov(Dest, T);	1885 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

1708 break;	1886 _mov(T, Src0, RegX8632::Reg_eax);

1709 case InstArithmetic::Lshr:	1887 _cbwdq(T_edx, T);

1710 _mov(T, Src0);	1888 _idiv(T, Src1, T_edx);

1711 if (!llvm::isa<Constant>(Src1))	1889 _mov(Dest, T);

1712 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);	1890 }

1713 _shr(T, Src1);	1891 break;

1714 _mov(Dest, T);	1892 case InstArithmetic::Urem:

1715 break;	1893 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1716 case InstArithmetic::Ashr:	1894 if (isByteSizedArithType(Dest->getType())) {

1717 _mov(T, Src0);	1895 Variable *T_ah = nullptr;

1718 if (!llvm::isa<Constant>(Src1))	1896 Constant *Zero = Ctx->getConstantZero(IceType_i8);

1719 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);	1897 _mov(T, Src0, RegX8632::Reg_eax);

1720 _sar(T, Src1);	1898 _mov(T_ah, Zero, RegX8632::Reg_ah);

1721 _mov(Dest, T);	1899 _div(T_ah, Src1, T);

1722 break;	1900 _mov(Dest, T_ah);

1723 case InstArithmetic::Udiv:	1901 } else {

1724 // div and idiv are the few arithmetic operators that do not allow	1902 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1725 // immediates as the operand.	1903 _mov(T_edx, Zero, RegX8632::Reg_edx);

1726 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1904 _mov(T, Src0, RegX8632::Reg_eax);

1727 if (isByteSizedArithType(Dest->getType())) {	1905 _div(T_edx, Src1, T);

1728 Variable *T_ah = nullptr;	1906 _mov(Dest, T_edx);

1729 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1907 }

1730 _mov(T, Src0, RegX8632::Reg_eax);	1908 break;

1731 _mov(T_ah, Zero, RegX8632::Reg_ah);	1909 case InstArithmetic::Srem:

1732 _div(T, Src1, T_ah);	1910 // TODO(stichnot): Enable this after doing better performance

1733 _mov(Dest, T);	1911 // and cross testing.

1734 } else {	1912 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1735 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1913 // Optimize mod by constant power of 2, but not for Om1 or O0,

1736 _mov(T, Src0, RegX8632::Reg_eax);	1914 // just to keep things simple there.

1737 _mov(T_edx, Zero, RegX8632::Reg_edx);	1915 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1738 _div(T, Src1, T_edx);	1916 int32_t Divisor = C->getValue();

1739 _mov(Dest, T);	1917 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

	1918 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

	1919 uint32_t LogDiv = llvm::Log2_32(UDivisor);

	1920 Type Ty = Dest->getType();

	1921 // LLVM does the following for dest=src%(1<<log):

	1922 // t=src

	1923 // sar t,typewidth-1 // -1 if src is negative, 0 if not

	1924 // shr t,typewidth-log

	1925 // add t,src

	1926 // and t, -(1<<log)

	1927 // sub t,src

	1928 // neg t

	1929 // dest=t

	1930 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

	1931 // If for some reason we are dividing by 1, just assign 0.

	1932 if (LogDiv == 0) {

	1933 _mov(Dest, Ctx->getConstantZero(Ty));

	1934 return;

	1935 }

	1936 _mov(T, Src0);

	1937 // The initial sar is unnecessary when dividing by 2.

	1938 if (LogDiv > 1)

	1939 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

	1940 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

	1941 _add(T, Src0);

	1942 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

	1943 _sub(T, Src0);

	1944 _neg(T);

	1945 _mov(Dest, T);

	1946 return;

	1947 }

1740 }	1948 }

1741 break;	1949 }

1742 case InstArithmetic::Sdiv:	1950 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1743 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1951 if (isByteSizedArithType(Dest->getType())) {

1744 if (isByteSizedArithType(Dest->getType())) {	1952 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);

1745 _mov(T, Src0, RegX8632::Reg_eax);	1953 _mov(T, Src0, RegX8632::Reg_eax);

1746 _cbwdq(T, T);	1954 _cbwdq(T, T);

1747 _idiv(T, Src1, T);	1955 Context.insert(InstFakeDef::create(Func, T_ah));

1748 _mov(Dest, T);	1956 _idiv(T_ah, Src1, T);

1749 } else {	1957 _mov(Dest, T_ah);

1750 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);	1958 } else {

1751 _mov(T, Src0, RegX8632::Reg_eax);	1959 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

1752 _cbwdq(T_edx, T);	1960 _mov(T, Src0, RegX8632::Reg_eax);

1753 _idiv(T, Src1, T_edx);	1961 _cbwdq(T_edx, T);

1754 _mov(Dest, T);	1962 _idiv(T_edx, Src1, T);

1755 }	1963 _mov(Dest, T_edx);

1756 break;	1964 }

1757 case InstArithmetic::Urem:	1965 break;

1758 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1966 case InstArithmetic::Fadd:

1759 if (isByteSizedArithType(Dest->getType())) {	1967 _mov(T, Src0);

1760 Variable *T_ah = nullptr;	1968 _addss(T, Src1);

1761 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1969 _mov(Dest, T);

1762 _mov(T, Src0, RegX8632::Reg_eax);	1970 break;

1763 _mov(T_ah, Zero, RegX8632::Reg_ah);	1971 case InstArithmetic::Fsub:

1764 _div(T_ah, Src1, T);	1972 _mov(T, Src0);

1765 _mov(Dest, T_ah);	1973 _subss(T, Src1);

1766 } else {	1974 _mov(Dest, T);

1767 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1975 break;

1768 _mov(T_edx, Zero, RegX8632::Reg_edx);	1976 case InstArithmetic::Fmul:

1769 _mov(T, Src0, RegX8632::Reg_eax);	1977 _mov(T, Src0);

1770 _div(T_edx, Src1, T);	1978 _mulss(T, Src1);

1771 _mov(Dest, T_edx);	1979 _mov(Dest, T);

1772 }	1980 break;

1773 break;	1981 case InstArithmetic::Fdiv:

1774 case InstArithmetic::Srem:	1982 _mov(T, Src0);

1775 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1983 _divss(T, Src1);

1776 if (isByteSizedArithType(Dest->getType())) {	1984 _mov(Dest, T);

1777 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);	1985 break;

1778 _mov(T, Src0, RegX8632::Reg_eax);	1986 case InstArithmetic::Frem: {

1779 _cbwdq(T, T);	1987 const SizeT MaxSrcs = 2;

1780 Context.insert(InstFakeDef::create(Func, T_ah));	1988 Type Ty = Dest->getType();

1781 _idiv(T_ah, Src1, T);	1989 InstCall *Call = makeHelperCall(

1782 _mov(Dest, T_ah);	1990 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

1783 } else {	1991 Call->addArg(Src0);

1784 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);	1992 Call->addArg(Src1);

1785 _mov(T, Src0, RegX8632::Reg_eax);	1993 return lowerCall(Call);

1786 _cbwdq(T_edx, T);	1994 }

1787 _idiv(T_edx, Src1, T);

1788 _mov(Dest, T_edx);

1789 }

1790 break;

1791 case InstArithmetic::Fadd:

1792 _mov(T, Src0);

1793 _addss(T, Src1);

1794 _mov(Dest, T);

1795 break;

1796 case InstArithmetic::Fsub:

1797 _mov(T, Src0);

1798 _subss(T, Src1);

1799 _mov(Dest, T);

1800 break;

1801 case InstArithmetic::Fmul:

1802 _mov(T, Src0);

1803 _mulss(T, Src1);

1804 _mov(Dest, T);

1805 break;

1806 case InstArithmetic::Fdiv:

1807 _mov(T, Src0);

1808 _divss(T, Src1);

1809 _mov(Dest, T);

1810 break;

1811 case InstArithmetic::Frem: {

1812 const SizeT MaxSrcs = 2;

1813 Type Ty = Dest->getType();

1814 InstCall *Call =

1815 makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64,

1816 Dest, MaxSrcs);

1817 Call->addArg(Src0);

1818 Call->addArg(Src1);

1819 return lowerCall(Call);

1820 } break;

1821 }

1822 }	1995 }

1823 }	1996 }

1824	1997

1825 void TargetX8632::lowerAssign(const InstAssign *Inst) {	1998 void TargetX8632::lowerAssign(const InstAssign *Inst) {

1826 Variable *Dest = Inst->getDest();	1999 Variable *Dest = Inst->getDest();

1827 Operand *Src0 = Inst->getSrc(0);	2000 Operand *Src0 = Inst->getSrc(0);

1828 assert(Dest->getType() == Src0->getType());	2001 assert(Dest->getType() == Src0->getType());

1829 if (Dest->getType() == IceType_i64) {	2002 if (Dest->getType() == IceType_i64) {

1830 Src0 = legalize(Src0);	2003 Src0 = legalize(Src0);

1831 Operand *Src0Lo = loOperand(Src0);	2004 Operand *Src0Lo = loOperand(Src0);

(...skipping 1280 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3112 Context.insert(	3285 Context.insert(

3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));	3286 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

3114 return;	3287 return;

3115 }	3288 }

3116 case Intrinsics::AtomicRMW:	3289 case Intrinsics::AtomicRMW:

3117 if (!Intrinsics::isMemoryOrderValid(	3290 if (!Intrinsics::isMemoryOrderValid(

3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) {	3291 ID, getConstantMemoryOrder(Instr->getArg(3)))) {

3119 Func->setError("Unexpected memory ordering for AtomicRMW");	3292 Func->setError("Unexpected memory ordering for AtomicRMW");

3120 return;	3293 return;

3121 }	3294 }

3122 lowerAtomicRMW(Instr->getDest(),	3295 lowerAtomicRMW(

3123 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(	3296 Instr->getDest(),

3124 Instr->getArg(0))->getValue()),	3297 static_cast<uint32_t>(

3125 Instr->getArg(1), Instr->getArg(2));	3298 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),

	3299 Instr->getArg(1), Instr->getArg(2));

3126 return;	3300 return;

3127 case Intrinsics::AtomicStore: {	3301 case Intrinsics::AtomicStore: {

3128 if (!Intrinsics::isMemoryOrderValid(	3302 if (!Intrinsics::isMemoryOrderValid(

3129 ID, getConstantMemoryOrder(Instr->getArg(2)))) {	3303 ID, getConstantMemoryOrder(Instr->getArg(2)))) {

3130 Func->setError("Unexpected memory ordering for AtomicStore");	3304 Func->setError("Unexpected memory ordering for AtomicStore");

3131 return;	3305 return;

3132 }	3306 }

3133 // We require the memory address to be naturally aligned.	3307 // We require the memory address to be naturally aligned.

3134 // Given that is the case, then normal stores are atomic.	3308 // Given that is the case, then normal stores are atomic.

3135 // Add a fence after the store to make it visible.	3309 // Add a fence after the store to make it visible.

(...skipping 1877 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5013 emitConstantPool<PoolTypeConverter<float>>(Ctx);	5187 emitConstantPool<PoolTypeConverter<float>>(Ctx);

5014 emitConstantPool<PoolTypeConverter<double>>(Ctx);	5188 emitConstantPool<PoolTypeConverter<double>>(Ctx);

5015 } break;	5189 } break;

5016 }	5190 }

5017 }	5191 }

5018	5192

5019 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)	5193 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)

5020 : TargetHeaderLowering(Ctx) {}	5194 : TargetHeaderLowering(Ctx) {}

5021	5195

5022 } // end of namespace Ice	5196 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/assembler/x86/immediate_encodings.ll » ('j') | no next file with comments »