OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 14 matching lines...) Expand all Loading... | |
25 #include "IceInstVarIter.h" | 25 #include "IceInstVarIter.h" |
26 #include "IceLiveness.h" | 26 #include "IceLiveness.h" |
27 #include "IceOperand.h" | 27 #include "IceOperand.h" |
28 #include "IcePhiLoweringImpl.h" | 28 #include "IcePhiLoweringImpl.h" |
29 #include "IceRegistersARM32.h" | 29 #include "IceRegistersARM32.h" |
30 #include "IceTargetLoweringARM32.def" | 30 #include "IceTargetLoweringARM32.def" |
31 #include "IceUtils.h" | 31 #include "IceUtils.h" |
32 #include "llvm/Support/MathExtras.h" | 32 #include "llvm/Support/MathExtras.h" |
33 | 33 |
34 #include <algorithm> | 34 #include <algorithm> |
35 #include <array> | |
35 #include <utility> | 36 #include <utility> |
36 | 37 |
37 namespace Ice { | 38 namespace Ice { |
38 | 39 |
39 namespace { | 40 namespace { |
40 | 41 |
41 // The following table summarizes the logic for lowering the icmp instruction | 42 // The following table summarizes the logic for lowering the icmp instruction |
42 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 43 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
43 // ARM32 conditional move instruction. | 44 // ARM32 conditional move instruction. |
44 | 45 |
(...skipping 1984 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2029 case InstArithmetic::Udiv: | 2030 case InstArithmetic::Udiv: |
2030 case InstArithmetic::Sdiv: | 2031 case InstArithmetic::Sdiv: |
2031 case InstArithmetic::Urem: | 2032 case InstArithmetic::Urem: |
2032 case InstArithmetic::Srem: | 2033 case InstArithmetic::Srem: |
2033 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " | 2034 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " |
2034 "should have already been handled before"); | 2035 "should have already been handled before"); |
2035 return; | 2036 return; |
2036 } | 2037 } |
2037 } | 2038 } |
2038 | 2039 |
2040 namespace { | |
2041 // StrengthReduction is a namespace with the strength reduction machinery. The | |
2042 // entry point is the StrengthReduction::tryToOptimize method. It returns true | |
2043 // if the optimization can be performed, and false otherwise. | |
2044 // | |
2045 // If the optimization can be performed, tryToOptimize sets its NumOperations | |
2046 // parameter to the number of shifts that are needed to perform the | |
2047 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub> | |
2048 // tuples that describe how to materialize the multiplication. | |
2049 // | |
2050 // The algorithm finds contiguous 1s in the Multiplication source, and uses one | |
2051 // or two shifts to materialize it. A sequence of 1s, i.e. | |
Jim Stichnoth
2015/11/24 19:32:20
e.g. instead of i.e., right?
John
2015/11/24 20:26:55
Done.
| |
2052 // | |
2053 // M N | |
2054 // ...00000000000011111...111110000000... | |
2055 // | |
2056 // is materializable with (1 << (M + 1)) - (1 << N): | |
2057 // | |
2058 // ...00000000000100000...000000000000... [1 << (M + 1)] | |
2059 // ...00000000000000000...000010000000... (-) [1 << N] | |
2060 // -------------------------------------- | |
2061 // ...00000000000011111...111110000000... | |
2062 // | |
2063 // And a single bit set, i.e., is just a left shift. | |
2064 namespace StrengthReduction { | |
2065 enum AggregationOperation { | |
2066 AO_Invalid, | |
2067 AO_Add, | |
2068 AO_Sub, | |
2069 }; | |
2070 | |
2071 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple. | |
2072 class AggregationElement { | |
2073 AggregationElement(const AggregationElement &) = delete; | |
2074 | |
2075 public: | |
2076 AggregationElement() = default; | |
2077 AggregationElement &operator=(const AggregationElement &) = default; | |
2078 AggregationElement(AggregationOperation Op, uint32_t ShAmt) | |
2079 : Op(Op), ShAmt(ShAmt) {} | |
2080 | |
2081 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const { | |
2082 assert(OpR->mustHaveReg()); | |
2083 if (ShAmt == 0) { | |
2084 return OpR; | |
2085 } | |
2086 return OperandARM32FlexReg::create( | |
2087 Func, IceType_i32, OpR, OperandARM32::LSL, | |
2088 OperandARM32ShAmtImm::create( | |
2089 Func, llvm::cast<ConstantInteger32>( | |
2090 Func->getContext()->getConstantInt32(ShAmt)))); | |
2091 } | |
2092 | |
2093 bool aggregateWithAdd() const { | |
2094 switch (Op) { | |
2095 case AO_Invalid: | |
2096 llvm::report_fatal_error("Invalid Strength Reduction Operations."); | |
2097 case AO_Add: | |
2098 return true; | |
2099 case AO_Sub: | |
2100 return false; | |
2101 } | |
2102 } | |
2103 | |
2104 uint32_t shAmt() const { return ShAmt; } | |
2105 | |
2106 private: | |
2107 AggregationOperation Op = AO_Invalid; | |
2108 uint32_t ShAmt = 33; | |
Jim Stichnoth
2015/11/24 19:32:20
I assumed 33 is meant to be an invalid value when
John
2015/11/24 20:26:55
This used to matter when this element was a const.
| |
2109 }; | |
2110 | |
2111 template <std::size_t N> | |
2112 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations, | |
Jim Stichnoth
2015/11/24 19:32:20
I think it might be helpful to document that [Rang
John
2015/11/24 20:26:55
Done.
| |
2113 std::array<AggregationElement, N> *Operations) { | |
2114 assert(*NumOperations < N); | |
2115 if (RangeStart == RangeEnd) { | |
2116 // Single bit set: | |
2117 // Src : 0...00010... | |
2118 // RangeStart : ^ | |
2119 // RangeEnd : ^ | |
2120 // NegSrc : 0...00001... | |
2121 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart); | |
2122 ++(*NumOperations); | |
2123 return true; | |
2124 } | |
2125 | |
2126 // Sequence of 1s: (two operations required.) | |
2127 // Src : 0...00011...110... | |
2128 // RangeStart : ^ | |
2129 // RangeEnd : ^ | |
2130 // NegSrc : 0...00000...001... | |
2131 if (*NumOperations + 1 >= N) { | |
2132 return false; | |
2133 } | |
2134 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1); | |
2135 ++(*NumOperations); | |
2136 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd); | |
2137 ++(*NumOperations); | |
2138 return true; | |
2139 } | |
2140 | |
2141 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit | |
2142 // 1 surrounded by zeroes. | |
2143 template <std::size_t N> | |
2144 bool tryToOptimize(uint32_t Src, SizeT *NumOperations, | |
2145 std::array<AggregationElement, N> *Operations) { | |
2146 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT; | |
2147 uint32_t NegSrc = ~Src; | |
2148 | |
2149 *NumOperations = 0; | |
2150 while (Src != 0 && *NumOperations < N) { | |
2151 // Each step of the algorithm: | |
2152 // * finds L, the last bit set in Src; | |
2153 // * clears all the upper bits in NegSrc up bit L; | |
Jim Stichnoth
2015/11/24 19:32:20
up to bit
John
2015/11/24 20:26:55
Done.
| |
2154 // * finds nL, the last bit set in NegSrc; | |
2155 // * clears all the upper bits in Src up to bit nL; | |
2156 // | |
2157 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence | |
2158 // of 1s starting at L, and ending a nL + 1, was found. | |
Jim Stichnoth
2015/11/24 19:32:20
ending at
John
2015/11/24 20:26:55
Done.
| |
2159 const uint32_t SrcLastBitSet = llvm::findLastSet(Src); | |
2160 const uint32_t NegSrcClearMask = | |
2161 (SrcLastBitSet == 0) ? 0 | |
2162 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet); | |
2163 NegSrc &= NegSrcClearMask; | |
2164 if (NegSrc == 0) { | |
2165 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) { | |
2166 return true; | |
2167 } | |
2168 return false; | |
2169 } | |
2170 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc); | |
2171 assert(NegSrcLastBitSet < SrcLastBitSet); | |
2172 const uint32_t SrcClearMask = | |
2173 (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >> | |
2174 (SrcSizeBits - NegSrcLastBitSet); | |
2175 Src &= SrcClearMask; | |
2176 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations, | |
2177 Operations)) { | |
2178 return false; | |
2179 } | |
2180 } | |
2181 | |
2182 return Src == 0; | |
2183 } | |
2184 } // end of namespace StrengthReduction | |
2185 } // end of anonymous namespace | |
2186 | |
2039 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 2187 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
2040 Variable *Dest = Inst->getDest(); | 2188 Variable *Dest = Inst->getDest(); |
2041 | 2189 |
2042 if (Dest->isRematerializable()) { | 2190 if (Dest->isRematerializable()) { |
2043 Context.insert(InstFakeDef::create(Func, Dest)); | 2191 Context.insert(InstFakeDef::create(Func, Dest)); |
2044 return; | 2192 return; |
2045 } | 2193 } |
2046 | 2194 |
2047 if (Dest->getType() == IceType_i1) { | 2195 if (Dest->getType() == IceType_i1) { |
2048 lowerInt1Arithmetic(Inst); | 2196 lowerInt1Arithmetic(Inst); |
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2198 } | 2346 } |
2199 case InstArithmetic::Xor: { | 2347 case InstArithmetic::Xor: { |
2200 Variable *Src0R = Srcs.src0R(this); | 2348 Variable *Src0R = Srcs.src0R(this); |
2201 Operand *Src1RF = Srcs.src1RF(this); | 2349 Operand *Src1RF = Srcs.src1RF(this); |
2202 _eor(T, Src0R, Src1RF); | 2350 _eor(T, Src0R, Src1RF); |
2203 _mov(Dest, T); | 2351 _mov(Dest, T); |
2204 return; | 2352 return; |
2205 } | 2353 } |
2206 case InstArithmetic::Sub: { | 2354 case InstArithmetic::Sub: { |
2207 if (Srcs.hasConstOperand()) { | 2355 if (Srcs.hasConstOperand()) { |
2208 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed | |
2209 // to be used. | |
2210 if (Srcs.immediateIsFlexEncodable()) { | 2356 if (Srcs.immediateIsFlexEncodable()) { |
2211 Variable *Src0R = Srcs.src0R(this); | 2357 Variable *Src0R = Srcs.src0R(this); |
2212 Operand *Src1RF = Srcs.src1RF(this); | 2358 Operand *Src1RF = Srcs.src1RF(this); |
2213 if (Srcs.swappedOperands()) { | 2359 if (Srcs.swappedOperands()) { |
2214 _rsb(T, Src0R, Src1RF); | 2360 _rsb(T, Src0R, Src1RF); |
2215 } else { | 2361 } else { |
2216 _sub(T, Src0R, Src1RF); | 2362 _sub(T, Src0R, Src1RF); |
2217 } | 2363 } |
2218 _mov(Dest, T); | 2364 _mov(Dest, T); |
2219 return; | 2365 return; |
2220 } | 2366 } |
2221 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { | 2367 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { |
2222 Variable *Src0R = Srcs.src0R(this); | 2368 Variable *Src0R = Srcs.src0R(this); |
2223 Operand *Src1F = Srcs.negatedSrc1F(this); | 2369 Operand *Src1F = Srcs.negatedSrc1F(this); |
2224 _add(T, Src0R, Src1F); | 2370 _add(T, Src0R, Src1F); |
2225 _mov(Dest, T); | 2371 _mov(Dest, T); |
2226 return; | 2372 return; |
2227 } | 2373 } |
2228 } | 2374 } |
2229 Variable *Src0R = Srcs.unswappedSrc0R(this); | 2375 Variable *Src0R = Srcs.unswappedSrc0R(this); |
2230 Variable *Src1R = Srcs.unswappedSrc1R(this); | 2376 Variable *Src1R = Srcs.unswappedSrc1R(this); |
2231 _sub(T, Src0R, Src1R); | 2377 _sub(T, Src0R, Src1R); |
2232 _mov(Dest, T); | 2378 _mov(Dest, T); |
2233 return; | 2379 return; |
2234 } | 2380 } |
2235 case InstArithmetic::Mul: { | 2381 case InstArithmetic::Mul: { |
2382 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; | |
Jim Stichnoth
2015/11/24 19:32:20
In IceTargetLoweringX86BaseImpl.h, strength-reduce
John
2015/11/24 20:26:55
In this case, I ran ammp with 4 and 5 shifts, and
| |
2383 if (!OptM1 && Srcs.hasConstOperand()) { | |
2384 constexpr std::size_t MaxShifts = 4; | |
2385 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts; | |
2386 SizeT NumOperations; | |
2387 int32_t Const = Srcs.getConstantValue(); | |
2388 const bool Invert = Const < 0; | |
2389 const bool MultiplyByZero = Const == 0; | |
2390 Operand *_0 = legalize(Ctx->getConstantInt32(0), Legal_Reg | Legal_Flex); | |
Jim Stichnoth
2015/11/24 19:32:20
Instead of getConstantInt32(0), it may be better t
John
2015/11/24 20:26:55
Done.
| |
2391 | |
2392 if (MultiplyByZero) { | |
2393 _mov(T, _0); | |
2394 _mov(Dest, T); | |
2395 return; | |
2396 } | |
2397 | |
2398 if (Invert) { | |
2399 Const = -Const; | |
2400 } | |
2401 | |
2402 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) { | |
2403 assert(NumOperations >= 1); | |
2404 Variable *Src0R = Srcs.src0R(this); | |
2405 int32_t Start; | |
2406 int32_t End; | |
2407 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) { | |
2408 // Multiplication by a power of 2 (NumOperations == 1); or | |
2409 // Multiplication by a even number not a power of 2. | |
2410 Start = 1; | |
2411 End = NumOperations; | |
2412 assert(Shifts[0].aggregateWithAdd()); | |
2413 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt())); | |
2414 } else { | |
2415 // Multiplication by an odd number. Put the free barrel shifter to a | |
2416 // good use. | |
2417 Start = 0; | |
2418 End = NumOperations - 2; | |
2419 const StrengthReduction::AggregationElement &Last = | |
2420 Shifts[NumOperations - 1]; | |
2421 const StrengthReduction::AggregationElement &SecondToLast = | |
2422 Shifts[NumOperations - 2]; | |
2423 if (!Last.aggregateWithAdd()) { | |
2424 assert(SecondToLast.aggregateWithAdd()); | |
2425 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); | |
2426 } else if (!SecondToLast.aggregateWithAdd()) { | |
2427 assert(Last.aggregateWithAdd()); | |
2428 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); | |
2429 } else { | |
2430 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); | |
2431 } | |
2432 } | |
2433 | |
2434 // Odd numbers : S E I I | |
2435 // +---+---+---+---+---+---+ ... +---+---+---+---+ | |
2436 // Shifts = | | | | | | | ... | | | | | | |
2437 // +---+---+---+---+---+---+ ... +---+---+---+---+ | |
2438 // Even numbers: I S E | |
2439 // | |
2440 // S: Start; E: End; I: Init | |
2441 for (int32_t I = Start; I < End; ++I) { | |
2442 const StrengthReduction::AggregationElement &Current = Shifts[I]; | |
2443 Operand *SrcF = Current.createShiftedOperand(Func, Src0R); | |
2444 if (Current.aggregateWithAdd()) { | |
2445 _add(T, T, SrcF); | |
2446 } else { | |
2447 _sub(T, T, SrcF); | |
2448 } | |
2449 } | |
2450 | |
2451 if (Invert) { | |
2452 // T = 0 - T. | |
2453 _rsb(T, T, _0); | |
2454 } | |
2455 | |
2456 _mov(Dest, T); | |
2457 return; | |
2458 } | |
2459 } | |
2236 Variable *Src0R = Srcs.unswappedSrc0R(this); | 2460 Variable *Src0R = Srcs.unswappedSrc0R(this); |
2237 Variable *Src1R = Srcs.unswappedSrc1R(this); | 2461 Variable *Src1R = Srcs.unswappedSrc1R(this); |
2238 _mul(T, Src0R, Src1R); | 2462 _mul(T, Src0R, Src1R); |
2239 _mov(Dest, T); | 2463 _mov(Dest, T); |
2240 return; | 2464 return; |
2241 } | 2465 } |
2242 case InstArithmetic::Shl: { | 2466 case InstArithmetic::Shl: { |
2243 Variable *Src0R = Srcs.unswappedSrc0R(this); | 2467 Variable *Src0R = Srcs.unswappedSrc0R(this); |
2244 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); | 2468 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); |
2245 _lsl(T, Src0R, Src1R); | 2469 _lsl(T, Src0R, Src1R); |
(...skipping 3257 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5503 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 5727 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
5504 // However, for compatibility with current NaCl LLVM, don't claim that. | 5728 // However, for compatibility with current NaCl LLVM, don't claim that. |
5505 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 5729 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
5506 } | 5730 } |
5507 | 5731 |
5508 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; | 5732 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; |
5509 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 5733 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
5510 llvm::SmallBitVector TargetARM32::ScratchRegs; | 5734 llvm::SmallBitVector TargetARM32::ScratchRegs; |
5511 | 5735 |
5512 } // end of namespace Ice | 5736 } // end of namespace Ice |
OLD | NEW |