 Chromium Code Reviews
 Chromium Code Reviews Issue 342763004:
  Add atomic load/store, fetch_add, fence, and is-lock-free lowering.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
    
  
    Issue 342763004:
  Add atomic load/store, fetch_add, fence, and is-lock-free lowering.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master| OLD | NEW | 
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 
| 2 // | 2 // | 
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator | 
| 4 // | 4 // | 
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source | 
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. | 
| 7 // | 7 // | 
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// | 
| 9 // | 9 // | 
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which | 
| (...skipping 413 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 424 if (Arg->hasReg()) { | 424 if (Arg->hasReg()) { | 
| 425 assert(Ty != IceType_i64); | 425 assert(Ty != IceType_i64); | 
| 426 OperandX8632Mem *Mem = OperandX8632Mem::create( | 426 OperandX8632Mem *Mem = OperandX8632Mem::create( | 
| 427 Func, Ty, FramePtr, | 427 Func, Ty, FramePtr, | 
| 428 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | 428 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | 
| 429 _mov(Arg, Mem); | 429 _mov(Arg, Mem); | 
| 430 } | 430 } | 
| 431 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 431 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 
| 432 } | 432 } | 
| 433 | 433 | 
| 434 // static | |
| 435 Type TargetX8632::stackSlotType() { return IceType_i32; } | 434 Type TargetX8632::stackSlotType() { return IceType_i32; } | 
| 436 | 435 | 
| 437 void TargetX8632::addProlog(CfgNode *Node) { | 436 void TargetX8632::addProlog(CfgNode *Node) { | 
| 438 // If SimpleCoalescing is false, each variable without a register | 437 // If SimpleCoalescing is false, each variable without a register | 
| 439 // gets its own unique stack slot, which leads to large stack | 438 // gets its own unique stack slot, which leads to large stack | 
| 440 // frames. If SimpleCoalescing is true, then each "global" variable | 439 // frames. If SimpleCoalescing is true, then each "global" variable | 
| 441 // without a register gets its own slot, but "local" variable slots | 440 // without a register gets its own slot, but "local" variable slots | 
| 442 // are reused across basic blocks. E.g., if A and B are local to | 441 // are reused across basic blocks. E.g., if A and B are local to | 
| 443 // block 1 and C is local to block 2, then C may share a slot with A | 442 // block 1 and C is local to block 2, then C may share a slot with A | 
| 444 // or B. | 443 // or B. | 
| (...skipping 1163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1608 assert(Src0RM->getType() == IceType_f64); | 1607 assert(Src0RM->getType() == IceType_f64); | 
| 1609 // a.i64 = bitcast b.f64 ==> | 1608 // a.i64 = bitcast b.f64 ==> | 
| 1610 // s.f64 = spill b.f64 | 1609 // s.f64 = spill b.f64 | 
| 1611 // t_lo.i32 = lo(s.f64) | 1610 // t_lo.i32 = lo(s.f64) | 
| 1612 // a_lo.i32 = t_lo.i32 | 1611 // a_lo.i32 = t_lo.i32 | 
| 1613 // t_hi.i32 = hi(s.f64) | 1612 // t_hi.i32 = hi(s.f64) | 
| 1614 // a_hi.i32 = t_hi.i32 | 1613 // a_hi.i32 = t_hi.i32 | 
| 1615 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode()); | 1614 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode()); | 
| 1616 Spill->setWeight(RegWeight::Zero); | 1615 Spill->setWeight(RegWeight::Zero); | 
| 1617 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true); | 1616 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true); | 
| 1618 _mov(Spill, Src0RM); | 1617 _movq(Spill, Src0RM); | 
| 1619 | 1618 | 
| 1620 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1619 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
| 1621 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1620 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
| 1622 Variable *T_Lo = makeReg(IceType_i32); | 1621 Variable *T_Lo = makeReg(IceType_i32); | 
| 1623 Variable *T_Hi = makeReg(IceType_i32); | 1622 Variable *T_Hi = makeReg(IceType_i32); | 
| 1624 VariableSplit *SpillLo = | 1623 VariableSplit *SpillLo = | 
| 1625 VariableSplit::create(Func, Spill, VariableSplit::Low); | 1624 VariableSplit::create(Func, Spill, VariableSplit::Low); | 
| 1626 VariableSplit *SpillHi = | 1625 VariableSplit *SpillHi = | 
| 1627 VariableSplit::create(Func, Spill, VariableSplit::High); | 1626 VariableSplit::create(Func, Spill, VariableSplit::High); | 
| 1628 | 1627 | 
| (...skipping 22 matching lines...) Expand all Loading... | |
| 1651 VariableSplit *SpillHi = | 1650 VariableSplit *SpillHi = | 
| 1652 VariableSplit::create(Func, Spill, VariableSplit::High); | 1651 VariableSplit::create(Func, Spill, VariableSplit::High); | 
| 1653 _mov(T_Lo, loOperand(Src0)); | 1652 _mov(T_Lo, loOperand(Src0)); | 
| 1654 // Technically, the Spill is defined after the _store happens, but | 1653 // Technically, the Spill is defined after the _store happens, but | 
| 1655 // SpillLo is considered a "use" of Spill so define Spill before it | 1654 // SpillLo is considered a "use" of Spill so define Spill before it | 
| 1656 // is used. | 1655 // is used. | 
| 1657 Context.insert(InstFakeDef::create(Func, Spill)); | 1656 Context.insert(InstFakeDef::create(Func, Spill)); | 
| 1658 _store(T_Lo, SpillLo); | 1657 _store(T_Lo, SpillLo); | 
| 1659 _mov(T_Hi, hiOperand(Src0)); | 1658 _mov(T_Hi, hiOperand(Src0)); | 
| 1660 _store(T_Hi, SpillHi); | 1659 _store(T_Hi, SpillHi); | 
| 1661 _mov(Dest, Spill); | 1660 _movq(Dest, Spill); | 
| 1662 } break; | 1661 } break; | 
| 1663 } | 1662 } | 
| 1664 break; | 1663 break; | 
| 1665 } | 1664 } | 
| 1666 } | 1665 } | 
| 1667 } | 1666 } | 
| 1668 | 1667 | 
| 1669 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 1668 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 
| 1670 Operand *Src0 = Inst->getSrc(0); | 1669 Operand *Src0 = Inst->getSrc(0); | 
| 1671 Operand *Src1 = Inst->getSrc(1); | 1670 Operand *Src1 = Inst->getSrc(1); | 
| (...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1793 _mov(Dest, One); | 1792 _mov(Dest, One); | 
| 1794 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 1793 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 
| 1795 Context.insert(InstFakeUse::create(Func, Dest)); | 1794 Context.insert(InstFakeUse::create(Func, Dest)); | 
| 1796 _mov(Dest, Zero); | 1795 _mov(Dest, Zero); | 
| 1797 Context.insert(Label); | 1796 Context.insert(Label); | 
| 1798 } | 1797 } | 
| 1799 | 1798 | 
| 1800 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 1799 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 
| 1801 switch (Instr->getIntrinsicInfo().ID) { | 1800 switch (Instr->getIntrinsicInfo().ID) { | 
| 1802 case Intrinsics::AtomicCmpxchg: | 1801 case Intrinsics::AtomicCmpxchg: | 
| 1802 if (!Intrinsics::VerifyMemoryOrder( | |
| 1803 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | |
| 1804 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | |
| 1805 return; | |
| 1806 } | |
| 1807 if (!Intrinsics::VerifyMemoryOrder( | |
| 1808 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | |
| 1809 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | |
| 1810 return; | |
| 1811 } | |
| 1812 // TODO(jvoung): fill it in. | |
| 1813 Func->setError("Unhandled intrinsic"); | |
| 1814 return; | |
| 1803 case Intrinsics::AtomicFence: | 1815 case Intrinsics::AtomicFence: | 
| 1816 if (!Intrinsics::VerifyMemoryOrder( | |
| 1817 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | |
| 1818 Func->setError("Unexpected memory ordering for AtomicFence"); | |
| 1819 return; | |
| 1820 } | |
| 1821 _mfence(); | |
| 1822 return; | |
| 1804 case Intrinsics::AtomicFenceAll: | 1823 case Intrinsics::AtomicFenceAll: | 
| 1805 case Intrinsics::AtomicIsLockFree: | 1824 // NOTE: FenceAll should prevent and load/store from being moved | 
| 1806 case Intrinsics::AtomicLoad: | 1825 // across the fence (both atomic and non-atomic). The InstX8632Mfence | 
| 1826 // instruction is currently marked coarsely as "HasSideEffects". | |
| 1827 _mfence(); | |
| 1828 return; | |
| 1829 case Intrinsics::AtomicIsLockFree: { | |
| 1830 // X86 is always lock free for 8/16/32/64 bit accesses. | |
| 1831 // TODO(jvoung): Since the result is constant when given a constant | |
| 1832 // byte size, this opens up DCE opportunities. | |
| 1833 Operand *ByteSize = Instr->getArg(0); | |
| 1834 Variable *Dest = Instr->getDest(); | |
| 1835 if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) { | |
| 1836 Constant *Result; | |
| 1837 switch (CI->getValue()) { | |
| 1838 default: | |
| 1839 // For now error-out if the size is greater than what we expect, | |
| 1840 // in case we start allowing atomic operations for larger types, | |
| 1841 // we will then know to change this code. | |
| 1842 // Some models of x86 (almost all the ones that support 64-bit) | |
| 1843 // have cmpxchg16b, which can make 16-byte operations lock free. | |
| 
JF
2014/06/25 01:44:03
I'd clarify that it needs a LOCK prefix.
 
jvoung (off chromium)
2014/06/25 15:32:44
Actually, I don't think this is usable for x86-32
 
JF
2014/06/25 15:41:25
Oh right. It also looks like we'll need to conditi
 | |
| 1844 if (CI->getValue() > 8) { | |
| 1845 Func->setError("Unexpected AtomicIsLockFree byte size (> 8 bytes)"); | |
| 1846 return; | |
| 1847 } | |
| 1848 Result = Ctx->getConstantZero(IceType_i32); | |
| 1849 break; | |
| 1850 case 1: | |
| 1851 case 2: | |
| 1852 case 4: | |
| 1853 case 8: | |
| 1854 Result = Ctx->getConstantInt(IceType_i32, 1); | |
| 1855 break; | |
| 1856 } | |
| 1857 _mov(Dest, Result); | |
| 1858 return; | |
| 1859 } | |
| 1860 // The PNaCl ABI requires the byte size to be a compile-time constant. | |
| 1861 Func->setError("AtomicIsLockFree byte size should be compile-time const"); | |
| 1862 return; | |
| 1863 } | |
| 1864 case Intrinsics::AtomicLoad: { | |
| 1865 // We require the memory address to be naturally aligned. | |
| 1866 // Given that is the case, then normal loads are atomic. | |
| 1867 if (!Intrinsics::VerifyMemoryOrder( | |
| 1868 llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) { | |
| 1869 Func->setError("Unexpected memory ordering for AtomicLoad"); | |
| 1870 return; | |
| 1871 } | |
| 1872 Variable *Dest = Instr->getDest(); | |
| 1873 if (Dest->getType() == IceType_i64) { | |
| 1874 // Follow what GCC does and use a movq instead of what lowerLoad() | |
| 1875 // normally does (split the load into two). | |
| 1876 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding | |
| 1877 // can't happen anyway, since this is x86-32 and integer arithmetic only | |
| 1878 // happens on 32-bit quantities. | |
| 1879 Variable *T = makeReg(IceType_f64); | |
| 1880 OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64); | |
| 1881 _movq(T, Addr); | |
| 1882 // Then cast the bits back out of the XMM register to the i64 Dest. | |
| 1883 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | |
| 1884 lowerCast(Cast); | |
| 1885 // Make sure that the atomic load isn't elided. | |
| 1886 Context.insert(InstFakeUse::create(Func, Dest->getLo())); | |
| 1887 Context.insert(InstFakeUse::create(Func, Dest->getHi())); | |
| 1888 return; | |
| 1889 } | |
| 1890 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | |
| 1891 lowerLoad(Load); | |
| 1892 // Make sure the atomic load isn't elided. | |
| 1893 Context.insert(InstFakeUse::create(Func, Dest)); | |
| 1894 return; | |
| 1895 } | |
| 1807 case Intrinsics::AtomicRMW: | 1896 case Intrinsics::AtomicRMW: | 
| 1808 case Intrinsics::AtomicStore: | 1897 if (!Intrinsics::VerifyMemoryOrder( | 
| 1898 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | |
| 1899 Func->setError("Unexpected memory ordering for AtomicRMW"); | |
| 1900 return; | |
| 1901 } | |
| 1902 lowerAtomicRMW(Instr->getDest(), | |
| 1903 static_cast<uint32_t>(llvm::cast<ConstantInteger>( | |
| 1904 Instr->getArg(0))->getValue()), | |
| 1905 Instr->getArg(1), Instr->getArg(2)); | |
| 1906 return; | |
| 1907 case Intrinsics::AtomicStore: { | |
| 1908 if (!Intrinsics::VerifyMemoryOrder( | |
| 1909 llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) { | |
| 1910 Func->setError("Unexpected memory ordering for AtomicStore"); | |
| 1911 return; | |
| 1912 } | |
| 1913 // We require the memory address to be naturally aligned. | |
| 1914 // Given that is the case, then normal stores are atomic. | |
| 1915 // Add a fence after the store to make it visible. | |
| 1916 Operand *Value = Instr->getArg(0); | |
| 1917 Operand *Ptr = Instr->getArg(1); | |
| 1918 if (Value->getType() == IceType_i64) { | |
| 1919 // Use a movq instead of what lowerStore() normally does | |
| 1920 // (split the store into two), following what GCC does. | |
| 1921 // Cast the bits from int -> to an xmm register first. | |
| 1922 Variable *T = makeReg(IceType_f64); | |
| 1923 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); | |
| 1924 lowerCast(Cast); | |
| 1925 // Then store XMM w/ a movq. | |
| 1926 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64); | |
| 1927 _storeq(T, Addr); | |
| 1928 _mfence(); | |
| 1929 return; | |
| 1930 } | |
| 1931 InstStore *Store = InstStore::create(Func, Value, Ptr); | |
| 1932 lowerStore(Store); | |
| 1933 _mfence(); | |
| 1934 return; | |
| 1935 } | |
| 1809 case Intrinsics::Bswap: | 1936 case Intrinsics::Bswap: | 
| 1810 case Intrinsics::Ctlz: | 1937 case Intrinsics::Ctlz: | 
| 1811 case Intrinsics::Ctpop: | 1938 case Intrinsics::Ctpop: | 
| 1812 case Intrinsics::Cttz: | 1939 case Intrinsics::Cttz: | 
| 1940 // TODO(jvoung): fill it in. | |
| 1813 Func->setError("Unhandled intrinsic"); | 1941 Func->setError("Unhandled intrinsic"); | 
| 1814 return; | 1942 return; | 
| 1815 case Intrinsics::Longjmp: { | 1943 case Intrinsics::Longjmp: { | 
| 1816 InstCall *Call = makeHelperCall("longjmp", NULL, 2); | 1944 InstCall *Call = makeHelperCall("longjmp", NULL, 2); | 
| 1817 Call->addArg(Instr->getArg(0)); | 1945 Call->addArg(Instr->getArg(0)); | 
| 1818 Call->addArg(Instr->getArg(1)); | 1946 Call->addArg(Instr->getArg(1)); | 
| 1819 lowerCall(Call); | 1947 lowerCall(Call); | 
| 1820 break; | 1948 return; | 
| 1821 } | 1949 } | 
| 1822 case Intrinsics::Memcpy: { | 1950 case Intrinsics::Memcpy: { | 
| 1823 // In the future, we could potentially emit an inline memcpy/memset, etc. | 1951 // In the future, we could potentially emit an inline memcpy/memset, etc. | 
| 1824 // for intrinsic calls w/ a known length. | 1952 // for intrinsic calls w/ a known length. | 
| 1825 InstCall *Call = makeHelperCall("memcpy", NULL, 3); | 1953 InstCall *Call = makeHelperCall("memcpy", NULL, 3); | 
| 1826 Call->addArg(Instr->getArg(0)); | 1954 Call->addArg(Instr->getArg(0)); | 
| 1827 Call->addArg(Instr->getArg(1)); | 1955 Call->addArg(Instr->getArg(1)); | 
| 1828 Call->addArg(Instr->getArg(2)); | 1956 Call->addArg(Instr->getArg(2)); | 
| 1829 lowerCall(Call); | 1957 lowerCall(Call); | 
| 1830 break; | 1958 return; | 
| 1831 } | 1959 } | 
| 1832 case Intrinsics::Memmove: { | 1960 case Intrinsics::Memmove: { | 
| 1833 InstCall *Call = makeHelperCall("memmove", NULL, 3); | 1961 InstCall *Call = makeHelperCall("memmove", NULL, 3); | 
| 1834 Call->addArg(Instr->getArg(0)); | 1962 Call->addArg(Instr->getArg(0)); | 
| 1835 Call->addArg(Instr->getArg(1)); | 1963 Call->addArg(Instr->getArg(1)); | 
| 1836 Call->addArg(Instr->getArg(2)); | 1964 Call->addArg(Instr->getArg(2)); | 
| 1837 lowerCall(Call); | 1965 lowerCall(Call); | 
| 1838 break; | 1966 return; | 
| 1839 } | 1967 } | 
| 1840 case Intrinsics::Memset: { | 1968 case Intrinsics::Memset: { | 
| 1841 // The value operand needs to be extended to a stack slot size | 1969 // The value operand needs to be extended to a stack slot size | 
| 1842 // because we "push" only works for a specific operand size. | 1970 // because we "push" only works for a specific operand size. | 
| 1843 Operand *ValOp = Instr->getArg(1); | 1971 Operand *ValOp = Instr->getArg(1); | 
| 1844 assert(ValOp->getType() == IceType_i8); | 1972 assert(ValOp->getType() == IceType_i8); | 
| 1845 Variable *ValExt = makeReg(stackSlotType()); | 1973 Variable *ValExt = makeReg(stackSlotType()); | 
| 1846 _movzx(ValExt, ValOp); | 1974 _movzx(ValExt, ValOp); | 
| 1847 InstCall *Call = makeHelperCall("memset", NULL, 3); | 1975 InstCall *Call = makeHelperCall("memset", NULL, 3); | 
| 1848 Call->addArg(Instr->getArg(0)); | 1976 Call->addArg(Instr->getArg(0)); | 
| 1849 Call->addArg(ValExt); | 1977 Call->addArg(ValExt); | 
| 1850 Call->addArg(Instr->getArg(2)); | 1978 Call->addArg(Instr->getArg(2)); | 
| 1851 lowerCall(Call); | 1979 lowerCall(Call); | 
| 1852 break; | 1980 return; | 
| 1853 } | 1981 } | 
| 1854 case Intrinsics::NaClReadTP: { | 1982 case Intrinsics::NaClReadTP: { | 
| 1855 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | 1983 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 
| 1856 Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL, | 1984 Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL, | 
| 1857 0, OperandX8632Mem::SegReg_GS); | 1985 0, OperandX8632Mem::SegReg_GS); | 
| 1858 Variable *Dest = Instr->getDest(); | 1986 Variable *Dest = Instr->getDest(); | 
| 1859 Variable *T = NULL; | 1987 Variable *T = NULL; | 
| 1860 _mov(T, Src); | 1988 _mov(T, Src); | 
| 1861 _mov(Dest, T); | 1989 _mov(Dest, T); | 
| 1862 break; | 1990 return; | 
| 1863 } | 1991 } | 
| 1864 case Intrinsics::Setjmp: { | 1992 case Intrinsics::Setjmp: { | 
| 1865 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1); | 1993 InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1); | 
| 1866 Call->addArg(Instr->getArg(0)); | 1994 Call->addArg(Instr->getArg(0)); | 
| 1867 lowerCall(Call); | 1995 lowerCall(Call); | 
| 1868 break; | 1996 return; | 
| 1869 } | 1997 } | 
| 1870 case Intrinsics::Sqrt: | 1998 case Intrinsics::Sqrt: | 
| 1871 case Intrinsics::Stacksave: | 1999 case Intrinsics::Stacksave: | 
| 1872 case Intrinsics::Stackrestore: | 2000 case Intrinsics::Stackrestore: | 
| 2001 // TODO(jvoung): fill it in. | |
| 1873 Func->setError("Unhandled intrinsic"); | 2002 Func->setError("Unhandled intrinsic"); | 
| 1874 return; | 2003 return; | 
| 1875 case Intrinsics::Trap: | 2004 case Intrinsics::Trap: | 
| 1876 _ud2(); | 2005 _ud2(); | 
| 1877 break; | 2006 return; | 
| 1878 case Intrinsics::UnknownIntrinsic: | 2007 case Intrinsics::UnknownIntrinsic: | 
| 1879 Func->setError("Should not be lowering UnknownIntrinsic"); | 2008 Func->setError("Should not be lowering UnknownIntrinsic"); | 
| 1880 return; | 2009 return; | 
| 1881 } | 2010 } | 
| 1882 return; | 2011 return; | 
| 1883 } | 2012 } | 
| 1884 | 2013 | 
| 2014 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | |
| 2015 Operand *Ptr, Operand *Val) { | |
| 2016 switch (Operation) { | |
| 2017 default: | |
| 2018 Func->setError("Unknown AtomicRMW operation"); | |
| 2019 return; | |
| 2020 case Intrinsics::AtomicAdd: { | |
| 2021 if (Dest->getType() == IceType_i64) { | |
| 2022 // Do a nasty cmpxchg8b loop. Factor this into a function. | |
| 2023 // TODO(jvoung): fill it in. | |
| 2024 Func->setError("Unhandled AtomicRMW operation"); | |
| 2025 return; | |
| 2026 } | |
| 2027 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); | |
| 2028 const bool Locked = true; | |
| 2029 Variable *T = NULL; | |
| 2030 _mov(T, Val); | |
| 2031 _xadd(Addr, T, Locked); | |
| 2032 _mov(Dest, T); | |
| 2033 return; | |
| 2034 } | |
| 2035 case Intrinsics::AtomicSub: { | |
| 2036 if (Dest->getType() == IceType_i64) { | |
| 2037 // Do a nasty cmpxchg8b loop. | |
| 2038 // TODO(jvoung): fill it in. | |
| 2039 Func->setError("Unhandled AtomicRMW operation"); | |
| 2040 return; | |
| 2041 } | |
| 2042 // Generate a memory operand from Ptr. | |
| 2043 // neg... | |
| 2044 // Then do the same as AtomicAdd. | |
| 2045 // TODO(jvoung): fill it in. | |
| 2046 Func->setError("Unhandled AtomicRMW operation"); | |
| 2047 return; | |
| 2048 } | |
| 2049 case Intrinsics::AtomicOr: | |
| 2050 case Intrinsics::AtomicAnd: | |
| 2051 case Intrinsics::AtomicXor: | |
| 2052 case Intrinsics::AtomicExchange: | |
| 2053 // TODO(jvoung): fill it in. | |
| 2054 Func->setError("Unhandled AtomicRMW operation"); | |
| 2055 return; | |
| 2056 } | |
| 2057 } | |
| 2058 | |
| 1885 namespace { | 2059 namespace { | 
| 1886 | 2060 | 
| 1887 bool isAdd(const Inst *Inst) { | 2061 bool isAdd(const Inst *Inst) { | 
| 1888 if (const InstArithmetic *Arith = | 2062 if (const InstArithmetic *Arith = | 
| 1889 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 2063 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 
| 1890 return (Arith->getOp() == InstArithmetic::Add); | 2064 return (Arith->getOp() == InstArithmetic::Add); | 
| 1891 } | 2065 } | 
| 1892 return false; | 2066 return false; | 
| 1893 } | 2067 } | 
| 1894 | 2068 | 
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2011 | 2185 | 
| 2012 } // anonymous namespace | 2186 } // anonymous namespace | 
| 2013 | 2187 | 
| 2014 void TargetX8632::lowerLoad(const InstLoad *Inst) { | 2188 void TargetX8632::lowerLoad(const InstLoad *Inst) { | 
| 2015 // A Load instruction can be treated the same as an Assign | 2189 // A Load instruction can be treated the same as an Assign | 
| 2016 // instruction, after the source operand is transformed into an | 2190 // instruction, after the source operand is transformed into an | 
| 2017 // OperandX8632Mem operand. Note that the address mode | 2191 // OperandX8632Mem operand. Note that the address mode | 
| 2018 // optimization already creates an OperandX8632Mem operand, so it | 2192 // optimization already creates an OperandX8632Mem operand, so it | 
| 2019 // doesn't need another level of transformation. | 2193 // doesn't need another level of transformation. | 
| 2020 Type Ty = Inst->getDest()->getType(); | 2194 Type Ty = Inst->getDest()->getType(); | 
| 2021 Operand *Src0 = Inst->getSourceAddress(); | 2195 Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty); | 
| 2022 // Address mode optimization already creates an OperandX8632Mem | |
| 2023 // operand, so it doesn't need another level of transformation. | |
| 2024 if (!llvm::isa<OperandX8632Mem>(Src0)) { | |
| 2025 Variable *Base = llvm::dyn_cast<Variable>(Src0); | |
| 2026 Constant *Offset = llvm::dyn_cast<Constant>(Src0); | |
| 2027 assert(Base || Offset); | |
| 2028 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset); | |
| 2029 } | |
| 2030 | 2196 | 
| 2031 // Fuse this load with a subsequent Arithmetic instruction in the | 2197 // Fuse this load with a subsequent Arithmetic instruction in the | 
| 2032 // following situations: | 2198 // following situations: | 
| 2033 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b | 2199 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b | 
| 2034 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true | 2200 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true | 
| 2035 // | 2201 // | 
| 2036 // TODO: Clean up and test thoroughly. | 2202 // TODO: Clean up and test thoroughly. | 
| 2203 // (E.g., if there is an mfence-all make sure the load ends up on the | |
| 2204 // same side of the fence). | |
| 2037 // | 2205 // | 
| 2038 // TODO: Why limit to Arithmetic instructions? This could probably be | 2206 // TODO: Why limit to Arithmetic instructions? This could probably be | 
| 2039 // applied to most any instruction type. Look at all source operands | 2207 // applied to most any instruction type. Look at all source operands | 
| 2040 // in the following instruction, and if there is one instance of the | 2208 // in the following instruction, and if there is one instance of the | 
| 2041 // load instruction's dest variable, and that instruction ends that | 2209 // load instruction's dest variable, and that instruction ends that | 
| 2042 // variable's live range, then make the substitution. Deal with | 2210 // variable's live range, then make the substitution. Deal with | 
| 2043 // commutativity optimization in the arithmetic instruction lowering. | 2211 // commutativity optimization in the arithmetic instruction lowering. | 
| 2044 InstArithmetic *NewArith = NULL; | 2212 InstArithmetic *NewArith = NULL; | 
| 2045 if (InstArithmetic *Arith = | 2213 if (InstArithmetic *Arith = | 
| 2046 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) { | 2214 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) { | 
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2157 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); | 2325 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); | 
| 2158 _mov(Dest, SrcF); | 2326 _mov(Dest, SrcF); | 
| 2159 } | 2327 } | 
| 2160 | 2328 | 
| 2161 Context.insert(Label); | 2329 Context.insert(Label); | 
| 2162 } | 2330 } | 
| 2163 | 2331 | 
| 2164 void TargetX8632::lowerStore(const InstStore *Inst) { | 2332 void TargetX8632::lowerStore(const InstStore *Inst) { | 
| 2165 Operand *Value = Inst->getData(); | 2333 Operand *Value = Inst->getData(); | 
| 2166 Operand *Addr = Inst->getAddr(); | 2334 Operand *Addr = Inst->getAddr(); | 
| 2167 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr); | 2335 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); | 
| 2168 // Address mode optimization already creates an OperandX8632Mem | |
| 2169 // operand, so it doesn't need another level of transformation. | |
| 2170 if (!NewAddr) { | |
| 2171 // The address will be either a constant (which represents a global | |
| 2172 // variable) or a variable, so either the Base or Offset component | |
| 2173 // of the OperandX8632Mem will be set. | |
| 2174 Variable *Base = llvm::dyn_cast<Variable>(Addr); | |
| 2175 Constant *Offset = llvm::dyn_cast<Constant>(Addr); | |
| 2176 assert(Base || Offset); | |
| 2177 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset); | |
| 2178 } | |
| 2179 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr)); | |
| 2180 | 2336 | 
| 2181 if (NewAddr->getType() == IceType_i64) { | 2337 if (NewAddr->getType() == IceType_i64) { | 
| 2182 Value = legalize(Value); | 2338 Value = legalize(Value); | 
| 2183 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 2339 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 
| 2184 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 2340 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 
| 2185 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 2341 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 
| 2186 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 2342 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 
| 2187 } else { | 2343 } else { | 
| 2188 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 2344 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 
| 2189 _store(Value, NewAddr); | 2345 _store(Value, NewAddr); | 
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2287 // | 2443 // | 
| 2288 // If in the future the implementation is changed to lower undef | 2444 // If in the future the implementation is changed to lower undef | 
| 2289 // values to uninitialized registers, a FakeDef will be needed: | 2445 // values to uninitialized registers, a FakeDef will be needed: | 
| 2290 // Context.insert(InstFakeDef::create(Func, Reg)); | 2446 // Context.insert(InstFakeDef::create(Func, Reg)); | 
| 2291 // This is in order to ensure that the live range of Reg is not | 2447 // This is in order to ensure that the live range of Reg is not | 
| 2292 // overestimated. If the constant being lowered is a 64 bit value, | 2448 // overestimated. If the constant being lowered is a 64 bit value, | 
| 2293 // then the result should be split and the lo and hi components will | 2449 // then the result should be split and the lo and hi components will | 
| 2294 // need to go in uninitialized registers. | 2450 // need to go in uninitialized registers. | 
| 2295 From = Ctx->getConstantZero(From->getType()); | 2451 From = Ctx->getConstantZero(From->getType()); | 
| 2296 } | 2452 } | 
| 2297 bool NeedsReg = !(Allowed & Legal_Imm) || | 2453 bool NeedsReg = | 
| 2454 !(Allowed & Legal_Imm) || | |
| 2298 // ConstantFloat and ConstantDouble are actually memory operands. | 2455 // ConstantFloat and ConstantDouble are actually memory operands. | 
| 2299 (!(Allowed & Legal_Mem) && (From->getType() == IceType_f32 || | 2456 (!(Allowed & Legal_Mem) && | 
| 2300 From->getType() == IceType_f64)); | 2457 (From->getType() == IceType_f32 || From->getType() == IceType_f64)); | 
| 2301 if (NeedsReg) { | 2458 if (NeedsReg) { | 
| 2302 Variable *Reg = makeReg(From->getType(), RegNum); | 2459 Variable *Reg = makeReg(From->getType(), RegNum); | 
| 2303 _mov(Reg, From); | 2460 _mov(Reg, From); | 
| 2304 From = Reg; | 2461 From = Reg; | 
| 2305 } | 2462 } | 
| 2306 return From; | 2463 return From; | 
| 2307 } | 2464 } | 
| 2308 if (Variable *Var = llvm::dyn_cast<Variable>(From)) { | 2465 if (Variable *Var = llvm::dyn_cast<Variable>(From)) { | 
| 2309 // We need a new physical register for the operand if: | 2466 // We need a new physical register for the operand if: | 
| 2310 // Mem is not allowed and Var->getRegNum() is unknown, or | 2467 // Mem is not allowed and Var->getRegNum() is unknown, or | 
| (...skipping 12 matching lines...) Expand all Loading... | |
| 2323 llvm_unreachable("Unhandled operand kind in legalize()"); | 2480 llvm_unreachable("Unhandled operand kind in legalize()"); | 
| 2324 return From; | 2481 return From; | 
| 2325 } | 2482 } | 
| 2326 | 2483 | 
| 2327 // Provide a trivial wrapper to legalize() for this common usage. | 2484 // Provide a trivial wrapper to legalize() for this common usage. | 
| 2328 Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap, | 2485 Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap, | 
| 2329 int32_t RegNum) { | 2486 int32_t RegNum) { | 
| 2330 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum)); | 2487 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum)); | 
| 2331 } | 2488 } | 
| 2332 | 2489 | 
| 2490 OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) { | |
| 2491 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); | |
| 2492 // It may be the case that address mode optimization already creates | |
| 2493 // an OperandX8632Mem, so in that case it wouldn't need another level | |
| 2494 // of transformation. | |
| 2495 if (!Mem) { | |
| 2496 Variable *Base = llvm::dyn_cast<Variable>(Operand); | |
| 2497 Constant *Offset = llvm::dyn_cast<Constant>(Operand); | |
| 2498 assert(Base || Offset); | |
| 2499 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); | |
| 2500 } | |
| 2501 return llvm::cast<OperandX8632Mem>(legalize(Mem)); | |
| 2502 } | |
| 2503 | |
| 2333 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | 2504 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | 
| 2334 // There aren't any 64-bit integer registers for x86-32. | 2505 // There aren't any 64-bit integer registers for x86-32. | 
| 2335 assert(Type != IceType_i64); | 2506 assert(Type != IceType_i64); | 
| 2336 Variable *Reg = Func->makeVariable(Type, Context.getNode()); | 2507 Variable *Reg = Func->makeVariable(Type, Context.getNode()); | 
| 2337 if (RegNum == Variable::NoRegister) | 2508 if (RegNum == Variable::NoRegister) | 
| 2338 Reg->setWeightInfinite(); | 2509 Reg->setWeightInfinite(); | 
| 2339 else | 2510 else | 
| 2340 Reg->setRegNum(RegNum); | 2511 Reg->setRegNum(RegNum); | 
| 2341 return Reg; | 2512 return Reg; | 
| 2342 } | 2513 } | 
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2409 // llvm-mc doesn't parse "dword ptr [.L$foo]". | 2580 // llvm-mc doesn't parse "dword ptr [.L$foo]". | 
| 2410 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; | 2581 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; | 
| 2411 } | 2582 } | 
| 2412 | 2583 | 
| 2413 template <> void ConstantDouble::emit(GlobalContext *Ctx) const { | 2584 template <> void ConstantDouble::emit(GlobalContext *Ctx) const { | 
| 2414 Ostream &Str = Ctx->getStrEmit(); | 2585 Ostream &Str = Ctx->getStrEmit(); | 
| 2415 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]"; | 2586 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]"; | 
| 2416 } | 2587 } | 
| 2417 | 2588 | 
| 2418 } // end of namespace Ice | 2589 } // end of namespace Ice | 
| OLD | NEW |