OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 1782 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1793 _cmp(Src0New, Src1); | 1793 _cmp(Src0New, Src1); |
1794 _mov(Dest, One); | 1794 _mov(Dest, One); |
1795 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 1795 _br(getIcmp32Mapping(Inst->getCondition()), Label); |
1796 Context.insert(InstFakeUse::create(Func, Dest)); | 1796 Context.insert(InstFakeUse::create(Func, Dest)); |
1797 _mov(Dest, Zero); | 1797 _mov(Dest, Zero); |
1798 Context.insert(Label); | 1798 Context.insert(Label); |
1799 } | 1799 } |
1800 | 1800 |
1801 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 1801 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
1802 switch (Instr->getIntrinsicInfo().ID) { | 1802 switch (Instr->getIntrinsicInfo().ID) { |
1803 case Intrinsics::AtomicCmpxchg: | 1803 case Intrinsics::AtomicCmpxchg: { |
1804 if (!Intrinsics::VerifyMemoryOrder( | 1804 if (!Intrinsics::VerifyMemoryOrder( |
1805 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | 1805 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { |
1806 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 1806 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
1807 return; | 1807 return; |
1808 } | 1808 } |
1809 if (!Intrinsics::VerifyMemoryOrder( | 1809 if (!Intrinsics::VerifyMemoryOrder( |
1810 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 1810 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
1811 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 1811 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
1812 return; | 1812 return; |
1813 } | 1813 } |
1814 // TODO(jvoung): fill it in. | 1814 Variable *DestPrev = Instr->getDest(); |
1815 Func->setError("Unhandled intrinsic"); | 1815 Operand *PtrToMem = Instr->getArg(0); |
1816 Operand *Expected = Instr->getArg(1); | |
1817 Operand *Desired = Instr->getArg(2); | |
1818 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); | |
1819 // TODO(jvoung): If we peek ahead a few instructions and see how | |
1820 // DestPrev is used (typically via another compare and branch), | |
1821 // we may be able to optimize. If the result truly is used by a | |
1822 // compare + branch, and the comparison is for equality, then we can | |
1823 // optimized out the later compare, and fuse with the later branch. | |
Jim Stichnoth
2014/07/08 04:50:19
optimized --> optimize
jvoung (off chromium)
2014/07/09 17:07:55
Done.
| |
1816 return; | 1824 return; |
1825 } | |
1817 case Intrinsics::AtomicFence: | 1826 case Intrinsics::AtomicFence: |
1818 if (!Intrinsics::VerifyMemoryOrder( | 1827 if (!Intrinsics::VerifyMemoryOrder( |
1819 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | 1828 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { |
1820 Func->setError("Unexpected memory ordering for AtomicFence"); | 1829 Func->setError("Unexpected memory ordering for AtomicFence"); |
1821 return; | 1830 return; |
1822 } | 1831 } |
1823 _mfence(); | 1832 _mfence(); |
1824 return; | 1833 return; |
1825 case Intrinsics::AtomicFenceAll: | 1834 case Intrinsics::AtomicFenceAll: |
1826 // NOTE: FenceAll should prevent and load/store from being moved | 1835 // NOTE: FenceAll should prevent and load/store from being moved |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2001 case Intrinsics::Trap: | 2010 case Intrinsics::Trap: |
2002 _ud2(); | 2011 _ud2(); |
2003 return; | 2012 return; |
2004 case Intrinsics::UnknownIntrinsic: | 2013 case Intrinsics::UnknownIntrinsic: |
2005 Func->setError("Should not be lowering UnknownIntrinsic"); | 2014 Func->setError("Should not be lowering UnknownIntrinsic"); |
2006 return; | 2015 return; |
2007 } | 2016 } |
2008 return; | 2017 return; |
2009 } | 2018 } |
2010 | 2019 |
2020 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, | |
2021 Operand *Expected, Operand *Desired) { | |
2022 if (Expected->getType() == IceType_i64) { | |
2023 // Reserve the pre-colored registers first, before adding any more | |
2024 // infinite-weight variables from FormMemoryOperand's legalization. | |
2025 Variable *T_edx = makeReg(IceType_i32, Reg_edx); | |
2026 Variable *T_eax = makeReg(IceType_i32, Reg_eax); | |
2027 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); | |
2028 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); | |
2029 _mov(T_eax, loOperand(Expected)); | |
2030 _mov(T_edx, hiOperand(Expected)); | |
2031 _mov(T_ebx, loOperand(Desired)); | |
2032 _mov(T_ecx, hiOperand(Desired)); | |
2033 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | |
2034 const bool Locked = true; | |
2035 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | |
2036 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | |
2037 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | |
2038 _mov(DestLo, T_eax); | |
2039 _mov(DestHi, T_edx); | |
2040 return; | |
2041 } | |
2042 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); | |
2043 _mov(T_eax, Expected); | |
2044 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | |
2045 Variable *DesiredReg = legalizeToVar(Desired); | |
2046 const bool Locked = true; | |
2047 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | |
2048 _mov(DestPrev, T_eax); | |
2049 } | |
2050 | |
2011 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 2051 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
2012 Operand *Ptr, Operand *Val) { | 2052 Operand *Ptr, Operand *Val) { |
2053 bool NeedsCmpxchg = false; | |
2054 LowerBinOp Op_Lo = NULL; | |
2055 LowerBinOp Op_Hi = NULL; | |
2013 switch (Operation) { | 2056 switch (Operation) { |
2014 default: | 2057 default: |
2015 Func->setError("Unknown AtomicRMW operation"); | 2058 Func->setError("Unknown AtomicRMW operation"); |
2016 return; | 2059 return; |
2017 case Intrinsics::AtomicAdd: { | 2060 case Intrinsics::AtomicAdd: { |
2018 if (Dest->getType() == IceType_i64) { | 2061 if (Dest->getType() == IceType_i64) { |
2019 // Do a nasty cmpxchg8b loop. Factor this into a function. | 2062 // All the fall-through paths must set this to true, but use this |
2020 // TODO(jvoung): fill it in. | 2063 // for asserting. |
2021 Func->setError("Unhandled AtomicRMW operation"); | 2064 NeedsCmpxchg = true; |
2022 return; | 2065 Op_Lo = &TargetX8632::_add; |
2066 Op_Hi = &TargetX8632::_adc; | |
2067 break; | |
2023 } | 2068 } |
2024 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); | 2069 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
2025 const bool Locked = true; | 2070 const bool Locked = true; |
2026 Variable *T = NULL; | 2071 Variable *T = NULL; |
2027 _mov(T, Val); | 2072 _mov(T, Val); |
2028 _xadd(Addr, T, Locked); | 2073 _xadd(Addr, T, Locked); |
2029 _mov(Dest, T); | 2074 _mov(Dest, T); |
2030 return; | 2075 return; |
2031 } | 2076 } |
2032 case Intrinsics::AtomicSub: { | 2077 case Intrinsics::AtomicSub: { |
2033 if (Dest->getType() == IceType_i64) { | 2078 if (Dest->getType() == IceType_i64) { |
2034 // Do a nasty cmpxchg8b loop. | 2079 NeedsCmpxchg = true; |
2035 // TODO(jvoung): fill it in. | 2080 Op_Lo = &TargetX8632::_sub; |
2036 Func->setError("Unhandled AtomicRMW operation"); | 2081 Op_Hi = &TargetX8632::_sbb; |
2037 return; | 2082 break; |
2038 } | 2083 } |
2039 // Generate a memory operand from Ptr. | 2084 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
2040 // neg... | 2085 const bool Locked = true; |
2041 // Then do the same as AtomicAdd. | 2086 Variable *T = NULL; |
2042 // TODO(jvoung): fill it in. | 2087 _mov(T, Val); |
2043 Func->setError("Unhandled AtomicRMW operation"); | 2088 _neg(T); |
2089 _xadd(Addr, T, Locked); | |
2090 _mov(Dest, T); | |
2044 return; | 2091 return; |
2045 } | 2092 } |
2046 case Intrinsics::AtomicOr: | 2093 case Intrinsics::AtomicOr: |
2094 // TODO(jvoung): If Dest is null or dead, then some of these | |
2095 // operations do not need an "exchange", but just a locked op. | |
2096 // That appears to be "worth" it for sub, or, and, and xor. | |
2097 // xadd is probably fine vs lock add for add, and xchg is fine | |
2098 // vs an atomic store. | |
2099 NeedsCmpxchg = true; | |
2100 Op_Lo = &TargetX8632::_or; | |
2101 Op_Hi = &TargetX8632::_or; | |
2102 break; | |
2047 case Intrinsics::AtomicAnd: | 2103 case Intrinsics::AtomicAnd: |
2104 NeedsCmpxchg = true; | |
2105 Op_Lo = &TargetX8632::_and; | |
2106 Op_Hi = &TargetX8632::_and; | |
2107 break; | |
2048 case Intrinsics::AtomicXor: | 2108 case Intrinsics::AtomicXor: |
2109 NeedsCmpxchg = true; | |
2110 Op_Lo = &TargetX8632::_xor; | |
2111 Op_Hi = &TargetX8632::_xor; | |
2112 break; | |
2049 case Intrinsics::AtomicExchange: | 2113 case Intrinsics::AtomicExchange: |
2050 // TODO(jvoung): fill it in. | 2114 if (Dest->getType() == IceType_i64) { |
2051 Func->setError("Unhandled AtomicRMW operation"); | 2115 NeedsCmpxchg = true; |
2116 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values | |
2117 // just need to be moved to the ecx and ebx registers. | |
2118 Op_Lo = NULL; | |
2119 Op_Hi = NULL; | |
2120 break; | |
2121 } | |
2122 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); | |
2123 Variable *T = NULL; | |
2124 _mov(T, Val); | |
2125 _xchg(Addr, T); | |
2126 _mov(Dest, T); | |
2052 return; | 2127 return; |
2053 } | 2128 } |
2129 // Otherwise, we need a cmpxchg loop. | |
2130 assert(NeedsCmpxchg); | |
2131 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); | |
2132 } | |
2133 | |
2134 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, | |
2135 Variable *Dest, Operand *Ptr, | |
2136 Operand *Val) { | |
2137 // Expand a more complex RMW operation as a cmpxchg loop: | |
2138 // For 64-bit: | |
2139 // mov eax, [ptr] | |
2140 // mov edx, [ptr + 4] | |
2141 // .LABEL: | |
2142 // mov ebx, eax | |
2143 // <Op_Lo> ebx, <desired_adj_lo> | |
2144 // mov ecx, edx | |
2145 // <Op_Hi> ecx, <desired_adj_hi> | |
2146 // lock cmpxchg8b [ptr] | |
2147 // jne .LABEL | |
2148 // mov <dest_lo>, eax | |
2149 // mov <dest_lo>, edx | |
2150 // | |
2151 // For 32-bit: | |
2152 // mov eax, [ptr] | |
2153 // .LABEL: | |
2154 // mov <reg>, eax | |
2155 // op <reg>, [desired_adj] | |
2156 // lock cmpxchg [ptr], <reg> | |
2157 // jne .LABEL | |
2158 // mov <dest>, eax | |
2159 // | |
2160 // If Op_{Lo,Hi} are NULL, then just copy the value. | |
2161 Val = legalize(Val); | |
2162 Type Ty = Val->getType(); | |
2163 if (Ty == IceType_i64) { | |
2164 Variable *T_edx = makeReg(IceType_i32, Reg_edx); | |
2165 Variable *T_eax = makeReg(IceType_i32, Reg_eax); | |
2166 // FormMemoryOperand legalizes the Ptr to any reg. It usually picks eax, | |
2167 // but that conflicts with the later requirement that we use eax for | |
2168 // cmpxchg. The two infinite weight choices conflict and we end up | |
2169 // choosing eax for both. Work around this for now, but forcing Addr | |
Jim Stichnoth
2014/07/08 04:50:19
I'd like to understand more about this. It sounds
jvoung (off chromium)
2014/07/08 18:14:07
The problem was actually with O2 register allocati
Jim Stichnoth
2014/07/09 18:14:28
OK, then this is almost certainly a regalloc bug w
jvoung (off chromium)
2014/07/10 23:14:49
Done.
| |
2170 // to use a different register. Arbitrarily picking edi instead. | |
2171 bool AllowOverlap = false; | |
2172 Variable *LegalPtr = legalizeToVar(Ptr, AllowOverlap, Reg_edi); | |
2173 OperandX8632Mem *Addr = FormMemoryOperand(LegalPtr, Ty); | |
2174 _mov(T_eax, loOperand(Addr)); | |
2175 _mov(T_edx, hiOperand(Addr)); | |
2176 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); | |
2177 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); | |
2178 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
2179 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL; | |
2180 if (!IsXchg8b) { | |
2181 Context.insert(Label); | |
2182 _mov(T_ebx, T_eax); | |
2183 (this->*Op_Lo)(T_ebx, loOperand(Val)); | |
2184 _mov(T_ecx, T_edx); | |
2185 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | |
2186 } else { | |
2187 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | |
2188 // It just needs the Val loaded into ebx and ecx. | |
2189 // That can also be done before the loop. | |
2190 _mov(T_ebx, loOperand(Val)); | |
2191 _mov(T_ecx, hiOperand(Val)); | |
2192 Context.insert(Label); | |
2193 } | |
2194 const bool Locked = true; | |
2195 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | |
2196 _br(InstX8632Br::Br_ne, Label); | |
2197 if (!IsXchg8b) { | |
2198 // If Val is a variable, model the extended live range of Val through | |
2199 // the end of the loop, since it will be re-used by the loop. | |
2200 // Same with the address. | |
2201 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | |
2202 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | |
2203 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | |
2204 Context.insert(InstFakeUse::create(Func, ValLo)); | |
2205 Context.insert(InstFakeUse::create(Func, ValHi)); | |
2206 } | |
2207 Context.insert(InstFakeUse::create(Func, LegalPtr)); | |
2208 } else { | |
2209 // For xchg, just need to extend the live range of ebx/ecx. | |
2210 Context.insert(InstFakeUse::create(Func, T_ebx)); | |
2211 Context.insert(InstFakeUse::create(Func, T_ecx)); | |
2212 } | |
2213 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
2214 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
2215 _mov(DestLo, T_eax); | |
2216 _mov(DestHi, T_edx); | |
2217 return; | |
2218 } | |
2219 // FormMemoryOperand legalizes the Ptr to any reg. It usually picks eax, | |
2220 // but that conflicts with the later requirement that we use eax for | |
2221 // cmpxchg. The two infinite weight choices conflict and we end up | |
2222 // choosing eax for both. Work around this for now, but forcing Addr | |
2223 // to use a different register. Arbitrarily picking ecx instead. | |
2224 const bool AllowOverlap = false; | |
2225 Variable *LegalPtr = legalizeToVar(Ptr, AllowOverlap, Reg_ecx); | |
2226 OperandX8632Mem *Addr = FormMemoryOperand(LegalPtr, Ty); | |
2227 Variable *T_eax = makeReg(Ty, Reg_eax); | |
2228 _mov(T_eax, Addr); | |
2229 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
2230 Context.insert(Label); | |
2231 Variable *T = NULL; | |
2232 // We want to pick a different register for T than Eax, | |
2233 // and we also already used ecx for the LegalPtr. | |
2234 _mov(T, T_eax, Reg_edx); | |
2235 (this->*Op_Lo)(T, Val); | |
2236 const bool Locked = true; | |
2237 _cmpxchg(Addr, T_eax, T, Locked); | |
2238 _br(InstX8632Br::Br_ne, Label); | |
2239 // If Val is a variable, model the extended live range of Val through | |
2240 // the end of the loop, since it will be re-used by the loop. | |
2241 // Same with the address. | |
2242 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | |
2243 Context.insert(InstFakeUse::create(Func, ValVar)); | |
2244 } | |
2245 Context.insert(InstFakeUse::create(Func, LegalPtr)); | |
2246 _mov(Dest, T_eax); | |
2054 } | 2247 } |
2055 | 2248 |
2056 namespace { | 2249 namespace { |
2057 | 2250 |
2058 bool isAdd(const Inst *Inst) { | 2251 bool isAdd(const Inst *Inst) { |
2059 if (const InstArithmetic *Arith = | 2252 if (const InstArithmetic *Arith = |
2060 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 2253 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
2061 return (Arith->getOp() == InstArithmetic::Add); | 2254 return (Arith->getOp() == InstArithmetic::Add); |
2062 } | 2255 } |
2063 return false; | 2256 return false; |
(...skipping 450 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2514 Reg->setWeightInfinite(); | 2707 Reg->setWeightInfinite(); |
2515 else | 2708 else |
2516 Reg->setRegNum(RegNum); | 2709 Reg->setRegNum(RegNum); |
2517 return Reg; | 2710 return Reg; |
2518 } | 2711 } |
2519 | 2712 |
2520 void TargetX8632::postLower() { | 2713 void TargetX8632::postLower() { |
2521 if (Ctx->getOptLevel() != Opt_m1) | 2714 if (Ctx->getOptLevel() != Opt_m1) |
2522 return; | 2715 return; |
2523 // TODO: Avoid recomputing WhiteList every instruction. | 2716 // TODO: Avoid recomputing WhiteList every instruction. |
2524 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None); | 2717 RegSetMask RegInclude = RegSet_All; |
2718 RegSetMask RegExclude = RegSet_None | RegSet_StackPointer; | |
2719 if (hasFramePointer()) | |
jvoung (off chromium)
2014/07/07 17:31:16
split out -- but currently here so that my tests p
jvoung (off chromium)
2014/07/09 17:07:55
Done.
| |
2720 RegExclude |= RegSet_FramePointer; | |
2721 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude); | |
2525 // Make one pass to black-list pre-colored registers. TODO: If | 2722 // Make one pass to black-list pre-colored registers. TODO: If |
2526 // there was some prior register allocation pass that made register | 2723 // there was some prior register allocation pass that made register |
2527 // assignments, those registers need to be black-listed here as | 2724 // assignments, those registers need to be black-listed here as |
2528 // well. | 2725 // well. |
2529 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | 2726 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; |
2530 ++I) { | 2727 ++I) { |
2531 const Inst *Inst = *I; | 2728 const Inst *Inst = *I; |
2532 if (Inst->isDeleted()) | 2729 if (Inst->isDeleted()) |
2533 continue; | 2730 continue; |
2534 if (llvm::isa<InstFakeKill>(Inst)) | 2731 if (llvm::isa<InstFakeKill>(Inst)) |
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2681 for (SizeT i = 0; i < Size; ++i) { | 2878 for (SizeT i = 0; i < Size; ++i) { |
2682 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 2879 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
2683 } | 2880 } |
2684 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 2881 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
2685 } | 2882 } |
2686 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 2883 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
2687 << "\n"; | 2884 << "\n"; |
2688 } | 2885 } |
2689 | 2886 |
2690 } // end of namespace Ice | 2887 } // end of namespace Ice |
OLD | NEW |