src/IceTargetLoweringX8632.cpp - Issue 1146803002: Subzero: Strength-reduce mul by certain constants.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1146803002: Subzero: Strength-reduce mul by certain constants. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fill out the cross test. Cleanup. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 1269 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1280 // multiple of the required alignment at runtime.	1280 // multiple of the required alignment at runtime.

1281 Variable *T = makeReg(IceType_i32);	1281 Variable *T = makeReg(IceType_i32);

1282 _mov(T, TotalSize);	1282 _mov(T, TotalSize);

1283 _add(T, Ctx->getConstantInt32(Alignment - 1));	1283 _add(T, Ctx->getConstantInt32(Alignment - 1));

1284 _and(T, Ctx->getConstantInt32(-Alignment));	1284 _and(T, Ctx->getConstantInt32(-Alignment));

1285 _sub(esp, T);	1285 _sub(esp, T);

1286 }	1286 }

1287 _mov(Dest, esp);	1287 _mov(Dest, esp);

1288 }	1288 }

1289	1289

	1290 // Strength-reduce scalar integer multiplication by a constant (for

	1291 // i32 or narrower) for certain constants. The lea instruction can be

	1292 // used to multiply by 3, 5, or 9, and the lsh instruction can be used

	1293 // to multiply by powers of 2. These can be combined such that

	1294 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

	1295 // combined with left-shifting by 2.

	1296 bool TargetX8632::optimizeScalarMul(Variable Dest, Operand Src0,

	1297 int32_t Src1) {

	1298 // Disable this optimization for Om1 and O0, just to keep things

	1299 // simple there.

	1300 if (Ctx->getFlags().getOptLevel() < Opt_1)

	1301 return false;

	1302 Variable *T = nullptr;

	1303 if (Src1 == -1) {

	1304 _mov(T, Src0);

	1305 _neg(T);

	1306 _mov(Dest, T);

	1307 return true;

	1308 }

	1309 if (Src1 == 1) {

	1310 _mov(T, Src0);

	1311 _mov(Dest, T);

	1312 return true;

	1313 }

	1314 if (Src1 <= 0)
	John 2015/06/10 17:27:52 Just throwing an idea... would it work to invert S Just throwing an idea... would it work to invert Src1 and then _inv(T) at the end? Then this function would become something like if (Src1 == 0) { _mov(T, Zero); _mov(Dest, Zero); return true; } const bool NegativeSrc1 = Src1 < 0; if (NegativeSrc1) { Src1 = -Src1; } uint32_t Count9 = 0; uint32_t Count5 = 0; uint32_t Count3 = 0; uint32_t Count2 = 0; uint32_t CountOps = NegativeSrc1 ? 1 : 0; while (Src1 > 1) { ... } const uint32_t MaxOpsForOptimizedMul = 5; if (CountOps > MaxOpsForOptimizedMul) { return false; } _mov(T, Src0); ... if (NegativeSrc1) { _inv(T); } _mov(Dest, T); return true; Jim Stichnoth 2015/06/12 17:31:47 Done. Though I suppress that when the constant is Show quoted text On 2015/06/10 17:27:52, John wrote: > Just throwing an idea... would it work to invert Src1 and then _inv(T) at the > end? Then this function would become something like > > if (Src1 == 0) { > _mov(T, Zero); > _mov(Dest, Zero); > return true; > } > > const bool NegativeSrc1 = Src1 < 0; > > if (NegativeSrc1) { > Src1 = -Src1; > } > > uint32_t Count9 = 0; > uint32_t Count5 = 0; > uint32_t Count3 = 0; > uint32_t Count2 = 0; > uint32_t CountOps = NegativeSrc1 ? 1 : 0; > > while (Src1 > 1) { > ... > } > > const uint32_t MaxOpsForOptimizedMul = 5; > if (CountOps > MaxOpsForOptimizedMul) { > return false; > } > > _mov(T, Src0); > ... > if (NegativeSrc1) { > _inv(T); > } > _mov(Dest, T); > return true; Done. Though I suppress that when the constant is MININT.
	1315 return false;

	1316 uint32_t Count9 = 0;

	1317 uint32_t Count5 = 0;

	1318 uint32_t Count3 = 0;

	1319 uint32_t Count2 = 0;

	1320 uint32_t CountOps = 0;

	1321 while (Src1 > 1) {

	1322 if (Src1 % 9 == 0) {

	1323 ++CountOps;

	1324 ++Count9;

	1325 Src1 /= 9;

	1326 } else if (Src1 % 5 == 0) {

	1327 ++CountOps;

	1328 ++Count5;

	1329 Src1 /= 5;

	1330 } else if (Src1 % 3 == 0) {

	1331 ++CountOps;

	1332 ++Count3;

	1333 Src1 /= 3;

	1334 } else if (Src1 % 2 == 0) {

	1335 if (Count2 == 0)

	1336 ++CountOps;

	1337 ++Count2;

	1338 Src1 /= 2;

	1339 } else {

	1340 return false;

	1341 }

	1342 }

	1343 // Limit the number of lea/shl operations for a single multiply, to

	1344 // a somewhat arbitrary choice of 5.

	1345 const uint32_t MaxOpsForOptimizedMul = 5;
	jvoung (off chromium) 2015/06/10 18:12:27 FWIW, some of the agner tables say mul r/i is abou FWIW, some of the agner tables say mul r/i is about 3 microops for 32-bit (sometimes better for 8-bit, sometimes worse for 64-bit), so 5 might be high? I'm not sure how many ops LLVM/gcc will go up to. Anyway, could tune more later. John 2015/06/10 23:47:19 This little command echo "int v(int i) { return i Show quoted text On 2015/06/10 18:12:27, jvoung wrote: > FWIW, some of the agner tables say mul r/i is about 3 microops for 32-bit > (sometimes better for 8-bit, sometimes worse for 64-bit), so 5 might be high? > I'm not sure how many ops LLVM/gcc will go up to. > > Anyway, could tune more later. This little command echo "int v(int i) { return i * 25; }"\|gcc -S -O99 -x c - -o-\|less showed me a couple of interesting things. It seems gcc will not go over 2 instructions (and maybe a neg at the end for negative numbers.) Jim Stichnoth 2015/06/12 17:31:47 OK. I reduced the threshold to 3. Show quoted text On 2015/06/10 18:12:27, jvoung wrote: > FWIW, some of the agner tables say mul r/i is about 3 microops for 32-bit > (sometimes better for 8-bit, sometimes worse for 64-bit), so 5 might be high? > I'm not sure how many ops LLVM/gcc will go up to. > > Anyway, could tune more later. OK. I reduced the threshold to 3.
	1346 if (CountOps > MaxOpsForOptimizedMul)

	1347 return false;

	1348 _mov(T, Src0);

	1349 Constant *Zero = Ctx->getConstantZero(IceType_i32);

	1350 for (uint32_t i = 0; i < Count9; ++i) {

	1351 const uint16_t Shift = 3; // log2(9-1)

	1352 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

	1353 _set_dest_nonkillable();

	1354 }

	1355 for (uint32_t i = 0; i < Count5; ++i) {

	1356 const uint16_t Shift = 2; // log2(5-1)

	1357 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

	1358 _set_dest_nonkillable();

	1359 }

	1360 for (uint32_t i = 0; i < Count3; ++i) {

	1361 const uint16_t Shift = 1; // log2(3-1)

	1362 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

	1363 _set_dest_nonkillable();

	1364 }

	1365 if (Count2) {

	1366 _shl(T, Ctx->getConstantInt(Dest->getType(), Count2));

	1367 }

	1368 _mov(Dest, T);

	1369 return true;

	1370 }

	1371

1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {	1372 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

1291 Variable *Dest = Inst->getDest();	1373 Variable *Dest = Inst->getDest();

1292 Operand *Src0 = legalize(Inst->getSrc(0));	1374 Operand *Src0 = legalize(Inst->getSrc(0));

1293 Operand *Src1 = legalize(Inst->getSrc(1));	1375 Operand *Src1 = legalize(Inst->getSrc(1));

1294 if (Inst->isCommutative()) {	1376 if (Inst->isCommutative()) {

1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))	1377 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

1296 std::swap(Src0, Src1);	1378 std::swap(Src0, Src1);

	1379 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))

	1380 std::swap(Src0, Src1);

1297 }	1381 }

1298 if (Dest->getType() == IceType_i64) {	1382 if (Dest->getType() == IceType_i64) {

1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1383 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1384 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1301 Operand *Src0Lo = loOperand(Src0);	1385 Operand *Src0Lo = loOperand(Src0);

1302 Operand *Src0Hi = hiOperand(Src0);	1386 Operand *Src0Hi = hiOperand(Src0);

1303 Operand *Src1Lo = loOperand(Src1);	1387 Operand *Src1Lo = loOperand(Src1);

1304 Operand *Src1Hi = hiOperand(Src1);	1388 Operand *Src1Hi = hiOperand(Src1);

1305 Variable T_Lo = nullptr, T_Hi = nullptr;	1389 Variable T_Lo = nullptr, T_Hi = nullptr;

1306 switch (Inst->getOp()) {	1390 switch (Inst->getOp()) {

(...skipping 207 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1514 lowerCall(Call);	1598 lowerCall(Call);

1515 } break;	1599 } break;

1516 case InstArithmetic::Fadd:	1600 case InstArithmetic::Fadd:

1517 case InstArithmetic::Fsub:	1601 case InstArithmetic::Fsub:

1518 case InstArithmetic::Fmul:	1602 case InstArithmetic::Fmul:

1519 case InstArithmetic::Fdiv:	1603 case InstArithmetic::Fdiv:

1520 case InstArithmetic::Frem:	1604 case InstArithmetic::Frem:

1521 llvm_unreachable("FP instruction with i64 type");	1605 llvm_unreachable("FP instruction with i64 type");

1522 break;	1606 break;

1523 }	1607 }

1524 } else if (isVectorType(Dest->getType())) {	1608 return;

	1609 }

	1610 if (isVectorType(Dest->getType())) {

1525 // TODO: Trap on integer divide and integer modulo by zero.	1611 // TODO: Trap on integer divide and integer modulo by zero.

1526 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899	1612 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1527 if (llvm::isa<OperandX8632Mem>(Src1))	1613 if (llvm::isa<OperandX8632Mem>(Src1))

1528 Src1 = legalizeToVar(Src1);	1614 Src1 = legalizeToVar(Src1);

1529 switch (Inst->getOp()) {	1615 switch (Inst->getOp()) {

1530 case InstArithmetic::_num:	1616 case InstArithmetic::_num:

1531 llvm_unreachable("Unknown arithmetic operator");	1617 llvm_unreachable("Unknown arithmetic operator");

1532 break;	1618 break;

1533 case InstArithmetic::Add: {	1619 case InstArithmetic::Add: {

1534 Variable *T = makeReg(Dest->getType());	1620 Variable *T = makeReg(Dest->getType());

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1643 case InstArithmetic::Fdiv: {	1729 case InstArithmetic::Fdiv: {

1644 Variable *T = makeReg(Dest->getType());	1730 Variable *T = makeReg(Dest->getType());

1645 _movp(T, Src0);	1731 _movp(T, Src0);

1646 _divps(T, Src1);	1732 _divps(T, Src1);

1647 _movp(Dest, T);	1733 _movp(Dest, T);

1648 } break;	1734 } break;

1649 case InstArithmetic::Frem:	1735 case InstArithmetic::Frem:

1650 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1736 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1651 break;	1737 break;

1652 }	1738 }

1653 } else { // Dest->getType() is non-i64 scalar	1739 return;

	1740 }

	1741 { // TODO(stichnot): Remove this level of {}. (It's still there to

	1742 // make the diffs easier to read.)

1654 Variable *T_edx = nullptr;	1743 Variable *T_edx = nullptr;

1655 Variable *T = nullptr;	1744 Variable *T = nullptr;

1656 switch (Inst->getOp()) {	1745 switch (Inst->getOp()) {

1657 case InstArithmetic::_num:	1746 case InstArithmetic::_num:

1658 llvm_unreachable("Unknown arithmetic operator");	1747 llvm_unreachable("Unknown arithmetic operator");

1659 break;	1748 break;

1660 case InstArithmetic::Add:	1749 case InstArithmetic::Add:

1661 _mov(T, Src0);	1750 _mov(T, Src0);

1662 _add(T, Src1);	1751 _add(T, Src1);

1663 _mov(Dest, T);	1752 _mov(Dest, T);

(...skipping 12 matching lines...) Expand all Loading...
1676 _mov(T, Src0);	1765 _mov(T, Src0);

1677 _xor(T, Src1);	1766 _xor(T, Src1);

1678 _mov(Dest, T);	1767 _mov(Dest, T);

1679 break;	1768 break;

1680 case InstArithmetic::Sub:	1769 case InstArithmetic::Sub:

1681 _mov(T, Src0);	1770 _mov(T, Src0);

1682 _sub(T, Src1);	1771 _sub(T, Src1);

1683 _mov(Dest, T);	1772 _mov(Dest, T);

1684 break;	1773 break;

1685 case InstArithmetic::Mul:	1774 case InstArithmetic::Mul:

1686 // TODO: Optimize for llvm::isa<Constant>(Src1)	1775 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1687 // TODO: Strength-reduce multiplications by a constant,	1776 if (optimizeScalarMul(Dest, Src0, C->getValue()))

1688 // particularly -1 and powers of 2. Advanced: use lea to	1777 return;

1689 // multiply by 3, 5, 9.	1778 }

1690 //

1691 // The 8-bit version of imul only allows the form "imul r/m8"	1779 // The 8-bit version of imul only allows the form "imul r/m8"

1692 // where T must be in eax.	1780 // where T must be in eax.

1693 if (isByteSizedArithType(Dest->getType())) {	1781 if (isByteSizedArithType(Dest->getType())) {

1694 _mov(T, Src0, RegX8632::Reg_eax);	1782 _mov(T, Src0, RegX8632::Reg_eax);

1695 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1783 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1696 } else {	1784 } else {

1697 _mov(T, Src0);	1785 _mov(T, Src0);

1698 }	1786 }

1699 _imul(T, Src1);	1787 _imul(T, Src1);

1700 _mov(Dest, T);	1788 _mov(Dest, T);

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1733 _mov(Dest, T);	1821 _mov(Dest, T);

1734 } else {	1822 } else {

1735 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1823 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1736 _mov(T, Src0, RegX8632::Reg_eax);	1824 _mov(T, Src0, RegX8632::Reg_eax);

1737 _mov(T_edx, Zero, RegX8632::Reg_edx);	1825 _mov(T_edx, Zero, RegX8632::Reg_edx);

1738 _div(T, Src1, T_edx);	1826 _div(T, Src1, T_edx);

1739 _mov(Dest, T);	1827 _mov(Dest, T);

1740 }	1828 }

1741 break;	1829 break;

1742 case InstArithmetic::Sdiv:	1830 case InstArithmetic::Sdiv:

	1831 // TODO(stichnot): Enable this after doing better performance

	1832 // and cross testing.

	1833 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

	1834 // Optimize division by constant power of 2, but not for Om1

	1835 // or O0, just to keep things simple there.

	1836 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

	1837 int32_t Divisor = C->getValue();

	1838 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

	1839 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

	1840 uint32_t LogDiv = llvm::Log2_32(UDivisor);

	1841 Type Ty = Dest->getType();

	1842 // LLVM does the following for dest=src/(1<<log):

	1843 // t=src

	1844 // sar t,typewidth-1 // -1 if src is negative, 0 if not

	1845 // shr t,typewidth-log

	1846 // add t,src

	1847 // sar t,log

	1848 // dest=t

	1849 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

	1850 _mov(T, Src0);

	1851 // If for some reason we are dividing by 1, just treat it

	1852 // like an assignment.

	1853 if (LogDiv > 0) {

	1854 // The initial sar is unnecessary when dividing by 2.

	1855 if (LogDiv > 1)

	1856 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

	1857 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

	1858 _add(T, Src0);

	1859 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

	1860 }

	1861 _mov(Dest, T);

	1862 return;

	1863 }

	1864 }

	1865 }

1743 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1866 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1744 if (isByteSizedArithType(Dest->getType())) {	1867 if (isByteSizedArithType(Dest->getType())) {

1745 _mov(T, Src0, RegX8632::Reg_eax);	1868 _mov(T, Src0, RegX8632::Reg_eax);

1746 _cbwdq(T, T);	1869 _cbwdq(T, T);

1747 _idiv(T, Src1, T);	1870 _idiv(T, Src1, T);

1748 _mov(Dest, T);	1871 _mov(Dest, T);

1749 } else {	1872 } else {

1750 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);	1873 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

1751 _mov(T, Src0, RegX8632::Reg_eax);	1874 _mov(T, Src0, RegX8632::Reg_eax);

1752 _cbwdq(T_edx, T);	1875 _cbwdq(T_edx, T);

(...skipping 12 matching lines...) Expand all Loading...
1765 _mov(Dest, T_ah);	1888 _mov(Dest, T_ah);

1766 } else {	1889 } else {

1767 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1890 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1768 _mov(T_edx, Zero, RegX8632::Reg_edx);	1891 _mov(T_edx, Zero, RegX8632::Reg_edx);

1769 _mov(T, Src0, RegX8632::Reg_eax);	1892 _mov(T, Src0, RegX8632::Reg_eax);

1770 _div(T_edx, Src1, T);	1893 _div(T_edx, Src1, T);

1771 _mov(Dest, T_edx);	1894 _mov(Dest, T_edx);

1772 }	1895 }

1773 break;	1896 break;

1774 case InstArithmetic::Srem:	1897 case InstArithmetic::Srem:

	1898 // TODO(stichnot): Enable this after doing better performance

	1899 // and cross testing.

	1900 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

	1901 // Optimize mod by constant power of 2, but not for Om1 or O0,

	1902 // just to keep things simple there.

	1903 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

	1904 int32_t Divisor = C->getValue();

	1905 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

	1906 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

	1907 uint32_t LogDiv = llvm::Log2_32(UDivisor);

	1908 Type Ty = Dest->getType();

	1909 // LLVM does the following for dest=src%(1<<log):

	1910 // t=src

	1911 // sar t,typewidth-1 // -1 if src is negative, 0 if not

	1912 // shr t,typewidth-log

	1913 // add t,src

	1914 // and t, -(1<<log)

	1915 // sub t,src

	1916 // neg t

	1917 // dest=t

	1918 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

	1919 // If for some reason we are dividing by 1, just assign 0.

	1920 if (LogDiv == 0) {

	1921 _mov(Dest, Ctx->getConstantZero(Ty));

	1922 return;

	1923 }

	1924 _mov(T, Src0);

	1925 // The initial sar is unnecessary when dividing by 2.

	1926 if (LogDiv > 1)

	1927 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

	1928 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

	1929 _add(T, Src0);

	1930 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

	1931 _sub(T, Src0);

	1932 _neg(T);

	1933 _mov(Dest, T);

	1934 return;

	1935 }

	1936 }

	1937 }

1775 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1938 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1776 if (isByteSizedArithType(Dest->getType())) {	1939 if (isByteSizedArithType(Dest->getType())) {

1777 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);	1940 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);

1778 _mov(T, Src0, RegX8632::Reg_eax);	1941 _mov(T, Src0, RegX8632::Reg_eax);

1779 _cbwdq(T, T);	1942 _cbwdq(T, T);

1780 Context.insert(InstFakeDef::create(Func, T_ah));	1943 Context.insert(InstFakeDef::create(Func, T_ah));

1781 _idiv(T_ah, Src1, T);	1944 _idiv(T_ah, Src1, T);

1782 _mov(Dest, T_ah);	1945 _mov(Dest, T_ah);

1783 } else {	1946 } else {

1784 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);	1947 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

(...skipping 25 matching lines...) Expand all Loading...
1810 break;	1973 break;

1811 case InstArithmetic::Frem: {	1974 case InstArithmetic::Frem: {

1812 const SizeT MaxSrcs = 2;	1975 const SizeT MaxSrcs = 2;

1813 Type Ty = Dest->getType();	1976 Type Ty = Dest->getType();

1814 InstCall *Call =	1977 InstCall *Call =

1815 makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64,	1978 makeHelperCall(isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64,

1816 Dest, MaxSrcs);	1979 Dest, MaxSrcs);

1817 Call->addArg(Src0);	1980 Call->addArg(Src0);

1818 Call->addArg(Src1);	1981 Call->addArg(Src1);

1819 return lowerCall(Call);	1982 return lowerCall(Call);

1820 } break;	1983 }

1821 }	1984 }

1822 }	1985 }

1823 }	1986 }

1824	1987

1825 void TargetX8632::lowerAssign(const InstAssign *Inst) {	1988 void TargetX8632::lowerAssign(const InstAssign *Inst) {

1826 Variable *Dest = Inst->getDest();	1989 Variable *Dest = Inst->getDest();

1827 Operand *Src0 = Inst->getSrc(0);	1990 Operand *Src0 = Inst->getSrc(0);

1828 assert(Dest->getType() == Src0->getType());	1991 assert(Dest->getType() == Src0->getType());

1829 if (Dest->getType() == IceType_i64) {	1992 if (Dest->getType() == IceType_i64) {

1830 Src0 = legalize(Src0);	1993 Src0 = legalize(Src0);

(...skipping 3180 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5011 case FT_Asm:	5174 case FT_Asm:

5012 case FT_Iasm: {	5175 case FT_Iasm: {

5013 OstreamLocker L(Ctx);	5176 OstreamLocker L(Ctx);

5014 emitConstantPool<PoolTypeConverter<float>>(Ctx);	5177 emitConstantPool<PoolTypeConverter<float>>(Ctx);

5015 emitConstantPool<PoolTypeConverter<double>>(Ctx);	5178 emitConstantPool<PoolTypeConverter<double>>(Ctx);

5016 } break;	5179 } break;

5017 }	5180 }

5018 }	5181 }

5019	5182

5020 } // end of namespace Ice	5183 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/assembler/x86/immediate_encodings.ll » ('j') | no next file with comments »