OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 1898 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1909 _cmp(Src0New, Src1); | 1909 _cmp(Src0New, Src1); |
1910 _mov(Dest, One); | 1910 _mov(Dest, One); |
1911 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 1911 _br(getIcmp32Mapping(Inst->getCondition()), Label); |
1912 Context.insert(InstFakeUse::create(Func, Dest)); | 1912 Context.insert(InstFakeUse::create(Func, Dest)); |
1913 _mov(Dest, Zero); | 1913 _mov(Dest, Zero); |
1914 Context.insert(Label); | 1914 Context.insert(Label); |
1915 } | 1915 } |
1916 | 1916 |
1917 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 1917 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
1918 switch (Instr->getIntrinsicInfo().ID) { | 1918 switch (Instr->getIntrinsicInfo().ID) { |
1919 case Intrinsics::AtomicCmpxchg: | 1919 case Intrinsics::AtomicCmpxchg: { |
1920 if (!Intrinsics::VerifyMemoryOrder( | 1920 if (!Intrinsics::VerifyMemoryOrder( |
1921 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | 1921 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { |
1922 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 1922 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
1923 return; | 1923 return; |
1924 } | 1924 } |
1925 if (!Intrinsics::VerifyMemoryOrder( | 1925 if (!Intrinsics::VerifyMemoryOrder( |
1926 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 1926 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
1927 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 1927 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
1928 return; | 1928 return; |
1929 } | 1929 } |
1930 // TODO(jvoung): fill it in. | 1930 Variable *DestPrev = Instr->getDest(); |
1931 Func->setError("Unhandled intrinsic"); | 1931 Operand *PtrToMem = Instr->getArg(0); |
| 1932 Operand *Expected = Instr->getArg(1); |
| 1933 Operand *Desired = Instr->getArg(2); |
| 1934 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
| 1935 // TODO(jvoung): If we peek ahead a few instructions and see how |
| 1936 // DestPrev is used (typically via another compare and branch), |
| 1937 // we may be able to optimize. If the result truly is used by a |
| 1938 // compare + branch, and the comparison is for equality, then we can |
| 1939 // optimize out the later compare, and fuse with the later branch. |
1932 return; | 1940 return; |
| 1941 } |
1933 case Intrinsics::AtomicFence: | 1942 case Intrinsics::AtomicFence: |
1934 if (!Intrinsics::VerifyMemoryOrder( | 1943 if (!Intrinsics::VerifyMemoryOrder( |
1935 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | 1944 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { |
1936 Func->setError("Unexpected memory ordering for AtomicFence"); | 1945 Func->setError("Unexpected memory ordering for AtomicFence"); |
1937 return; | 1946 return; |
1938 } | 1947 } |
1939 _mfence(); | 1948 _mfence(); |
1940 return; | 1949 return; |
1941 case Intrinsics::AtomicFenceAll: | 1950 case Intrinsics::AtomicFenceAll: |
1942 // NOTE: FenceAll should prevent and load/store from being moved | 1951 // NOTE: FenceAll should prevent and load/store from being moved |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2124 case Intrinsics::Trap: | 2133 case Intrinsics::Trap: |
2125 _ud2(); | 2134 _ud2(); |
2126 return; | 2135 return; |
2127 case Intrinsics::UnknownIntrinsic: | 2136 case Intrinsics::UnknownIntrinsic: |
2128 Func->setError("Should not be lowering UnknownIntrinsic"); | 2137 Func->setError("Should not be lowering UnknownIntrinsic"); |
2129 return; | 2138 return; |
2130 } | 2139 } |
2131 return; | 2140 return; |
2132 } | 2141 } |
2133 | 2142 |
| 2143 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, |
| 2144 Operand *Expected, Operand *Desired) { |
| 2145 if (Expected->getType() == IceType_i64) { |
| 2146 // Reserve the pre-colored registers first, before adding any more |
| 2147 // infinite-weight variables from FormMemoryOperand's legalization. |
| 2148 Variable *T_edx = makeReg(IceType_i32, Reg_edx); |
| 2149 Variable *T_eax = makeReg(IceType_i32, Reg_eax); |
| 2150 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); |
| 2151 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); |
| 2152 _mov(T_eax, loOperand(Expected)); |
| 2153 _mov(T_edx, hiOperand(Expected)); |
| 2154 _mov(T_ebx, loOperand(Desired)); |
| 2155 _mov(T_ecx, hiOperand(Desired)); |
| 2156 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
| 2157 const bool Locked = true; |
| 2158 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 2159 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 2160 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 2161 _mov(DestLo, T_eax); |
| 2162 _mov(DestHi, T_edx); |
| 2163 return; |
| 2164 } |
| 2165 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); |
| 2166 _mov(T_eax, Expected); |
| 2167 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
| 2168 Variable *DesiredReg = legalizeToVar(Desired); |
| 2169 const bool Locked = true; |
| 2170 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 2171 _mov(DestPrev, T_eax); |
| 2172 } |
| 2173 |
2134 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 2174 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
2135 Operand *Ptr, Operand *Val) { | 2175 Operand *Ptr, Operand *Val) { |
| 2176 bool NeedsCmpxchg = false; |
| 2177 LowerBinOp Op_Lo = NULL; |
| 2178 LowerBinOp Op_Hi = NULL; |
2136 switch (Operation) { | 2179 switch (Operation) { |
2137 default: | 2180 default: |
2138 Func->setError("Unknown AtomicRMW operation"); | 2181 Func->setError("Unknown AtomicRMW operation"); |
2139 return; | 2182 return; |
2140 case Intrinsics::AtomicAdd: { | 2183 case Intrinsics::AtomicAdd: { |
2141 if (Dest->getType() == IceType_i64) { | 2184 if (Dest->getType() == IceType_i64) { |
2142 // Do a nasty cmpxchg8b loop. Factor this into a function. | 2185 // All the fall-through paths must set this to true, but use this |
2143 // TODO(jvoung): fill it in. | 2186 // for asserting. |
2144 Func->setError("Unhandled AtomicRMW operation"); | 2187 NeedsCmpxchg = true; |
2145 return; | 2188 Op_Lo = &TargetX8632::_add; |
| 2189 Op_Hi = &TargetX8632::_adc; |
| 2190 break; |
2146 } | 2191 } |
2147 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); | 2192 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
2148 const bool Locked = true; | 2193 const bool Locked = true; |
2149 Variable *T = NULL; | 2194 Variable *T = NULL; |
2150 _mov(T, Val); | 2195 _mov(T, Val); |
2151 _xadd(Addr, T, Locked); | 2196 _xadd(Addr, T, Locked); |
2152 _mov(Dest, T); | 2197 _mov(Dest, T); |
2153 return; | 2198 return; |
2154 } | 2199 } |
2155 case Intrinsics::AtomicSub: { | 2200 case Intrinsics::AtomicSub: { |
2156 if (Dest->getType() == IceType_i64) { | 2201 if (Dest->getType() == IceType_i64) { |
2157 // Do a nasty cmpxchg8b loop. | 2202 NeedsCmpxchg = true; |
2158 // TODO(jvoung): fill it in. | 2203 Op_Lo = &TargetX8632::_sub; |
2159 Func->setError("Unhandled AtomicRMW operation"); | 2204 Op_Hi = &TargetX8632::_sbb; |
2160 return; | 2205 break; |
2161 } | 2206 } |
2162 // Generate a memory operand from Ptr. | 2207 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
2163 // neg... | 2208 const bool Locked = true; |
2164 // Then do the same as AtomicAdd. | 2209 Variable *T = NULL; |
2165 // TODO(jvoung): fill it in. | 2210 _mov(T, Val); |
2166 Func->setError("Unhandled AtomicRMW operation"); | 2211 _neg(T); |
| 2212 _xadd(Addr, T, Locked); |
| 2213 _mov(Dest, T); |
2167 return; | 2214 return; |
2168 } | 2215 } |
2169 case Intrinsics::AtomicOr: | 2216 case Intrinsics::AtomicOr: |
| 2217 // TODO(jvoung): If Dest is null or dead, then some of these |
| 2218 // operations do not need an "exchange", but just a locked op. |
| 2219 // That appears to be "worth" it for sub, or, and, and xor. |
| 2220 // xadd is probably fine vs lock add for add, and xchg is fine |
| 2221 // vs an atomic store. |
| 2222 NeedsCmpxchg = true; |
| 2223 Op_Lo = &TargetX8632::_or; |
| 2224 Op_Hi = &TargetX8632::_or; |
| 2225 break; |
2170 case Intrinsics::AtomicAnd: | 2226 case Intrinsics::AtomicAnd: |
| 2227 NeedsCmpxchg = true; |
| 2228 Op_Lo = &TargetX8632::_and; |
| 2229 Op_Hi = &TargetX8632::_and; |
| 2230 break; |
2171 case Intrinsics::AtomicXor: | 2231 case Intrinsics::AtomicXor: |
| 2232 NeedsCmpxchg = true; |
| 2233 Op_Lo = &TargetX8632::_xor; |
| 2234 Op_Hi = &TargetX8632::_xor; |
| 2235 break; |
2172 case Intrinsics::AtomicExchange: | 2236 case Intrinsics::AtomicExchange: |
2173 // TODO(jvoung): fill it in. | 2237 if (Dest->getType() == IceType_i64) { |
2174 Func->setError("Unhandled AtomicRMW operation"); | 2238 NeedsCmpxchg = true; |
| 2239 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 2240 // just need to be moved to the ecx and ebx registers. |
| 2241 Op_Lo = NULL; |
| 2242 Op_Hi = NULL; |
| 2243 break; |
| 2244 } |
| 2245 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
| 2246 Variable *T = NULL; |
| 2247 _mov(T, Val); |
| 2248 _xchg(Addr, T); |
| 2249 _mov(Dest, T); |
2175 return; | 2250 return; |
2176 } | 2251 } |
| 2252 // Otherwise, we need a cmpxchg loop. |
| 2253 assert(NeedsCmpxchg); |
| 2254 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); |
| 2255 } |
| 2256 |
| 2257 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, |
| 2258 Variable *Dest, Operand *Ptr, |
| 2259 Operand *Val) { |
| 2260 // Expand a more complex RMW operation as a cmpxchg loop: |
| 2261 // For 64-bit: |
| 2262 // mov eax, [ptr] |
| 2263 // mov edx, [ptr + 4] |
| 2264 // .LABEL: |
| 2265 // mov ebx, eax |
| 2266 // <Op_Lo> ebx, <desired_adj_lo> |
| 2267 // mov ecx, edx |
| 2268 // <Op_Hi> ecx, <desired_adj_hi> |
| 2269 // lock cmpxchg8b [ptr] |
| 2270 // jne .LABEL |
| 2271 // mov <dest_lo>, eax |
| 2272 // mov <dest_lo>, edx |
| 2273 // |
| 2274 // For 32-bit: |
| 2275 // mov eax, [ptr] |
| 2276 // .LABEL: |
| 2277 // mov <reg>, eax |
| 2278 // op <reg>, [desired_adj] |
| 2279 // lock cmpxchg [ptr], <reg> |
| 2280 // jne .LABEL |
| 2281 // mov <dest>, eax |
| 2282 // |
| 2283 // If Op_{Lo,Hi} are NULL, then just copy the value. |
| 2284 Val = legalize(Val); |
| 2285 Type Ty = Val->getType(); |
| 2286 if (Ty == IceType_i64) { |
| 2287 Variable *T_edx = makeReg(IceType_i32, Reg_edx); |
| 2288 Variable *T_eax = makeReg(IceType_i32, Reg_eax); |
| 2289 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); |
| 2290 _mov(T_eax, loOperand(Addr)); |
| 2291 _mov(T_edx, hiOperand(Addr)); |
| 2292 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); |
| 2293 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); |
| 2294 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 2295 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL; |
| 2296 if (!IsXchg8b) { |
| 2297 Context.insert(Label); |
| 2298 _mov(T_ebx, T_eax); |
| 2299 (this->*Op_Lo)(T_ebx, loOperand(Val)); |
| 2300 _mov(T_ecx, T_edx); |
| 2301 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
| 2302 } else { |
| 2303 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
| 2304 // It just needs the Val loaded into ebx and ecx. |
| 2305 // That can also be done before the loop. |
| 2306 _mov(T_ebx, loOperand(Val)); |
| 2307 _mov(T_ecx, hiOperand(Val)); |
| 2308 Context.insert(Label); |
| 2309 } |
| 2310 const bool Locked = true; |
| 2311 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 2312 _br(InstX8632Br::Br_ne, Label); |
| 2313 if (!IsXchg8b) { |
| 2314 // If Val is a variable, model the extended live range of Val through |
| 2315 // the end of the loop, since it will be re-used by the loop. |
| 2316 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 2317 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
| 2318 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
| 2319 Context.insert(InstFakeUse::create(Func, ValLo)); |
| 2320 Context.insert(InstFakeUse::create(Func, ValHi)); |
| 2321 } |
| 2322 } else { |
| 2323 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
| 2324 Context.insert(InstFakeUse::create(Func, T_ebx)); |
| 2325 Context.insert(InstFakeUse::create(Func, T_ecx)); |
| 2326 } |
| 2327 // The address base is also reused in the loop. |
| 2328 Context.insert(InstFakeUse::create(Func, Addr->getBase())); |
| 2329 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2330 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2331 _mov(DestLo, T_eax); |
| 2332 _mov(DestHi, T_edx); |
| 2333 return; |
| 2334 } |
| 2335 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); |
| 2336 Variable *T_eax = makeReg(Ty, Reg_eax); |
| 2337 _mov(T_eax, Addr); |
| 2338 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 2339 Context.insert(Label); |
| 2340 // We want to pick a different register for T than Eax, so don't use |
| 2341 // _mov(T == NULL, T_eax). |
| 2342 Variable *T = makeReg(Ty); |
| 2343 _mov(T, T_eax); |
| 2344 (this->*Op_Lo)(T, Val); |
| 2345 const bool Locked = true; |
| 2346 _cmpxchg(Addr, T_eax, T, Locked); |
| 2347 _br(InstX8632Br::Br_ne, Label); |
| 2348 // If Val is a variable, model the extended live range of Val through |
| 2349 // the end of the loop, since it will be re-used by the loop. |
| 2350 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 2351 Context.insert(InstFakeUse::create(Func, ValVar)); |
| 2352 } |
| 2353 // The address base is also reused in the loop. |
| 2354 Context.insert(InstFakeUse::create(Func, Addr->getBase())); |
| 2355 _mov(Dest, T_eax); |
2177 } | 2356 } |
2178 | 2357 |
2179 namespace { | 2358 namespace { |
2180 | 2359 |
2181 bool isAdd(const Inst *Inst) { | 2360 bool isAdd(const Inst *Inst) { |
2182 if (const InstArithmetic *Arith = | 2361 if (const InstArithmetic *Arith = |
2183 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 2362 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
2184 return (Arith->getOp() == InstArithmetic::Add); | 2363 return (Arith->getOp() == InstArithmetic::Add); |
2185 } | 2364 } |
2186 return false; | 2365 return false; |
(...skipping 651 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2838 for (SizeT i = 0; i < Size; ++i) { | 3017 for (SizeT i = 0; i < Size; ++i) { |
2839 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3018 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
2840 } | 3019 } |
2841 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3020 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
2842 } | 3021 } |
2843 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3022 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
2844 << "\n"; | 3023 << "\n"; |
2845 } | 3024 } |
2846 | 3025 |
2847 } // end of namespace Ice | 3026 } // end of namespace Ice |
OLD | NEW |