Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(56)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1469113003: Subzero. ARM32. Strength reduce multiplications. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Modified arith.ll to test the strength reduction output. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/arith.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 14 matching lines...) Expand all
25 #include "IceInstVarIter.h" 25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h" 26 #include "IceLiveness.h"
27 #include "IceOperand.h" 27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h" 28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h" 29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def" 30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h" 31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h" 32 #include "llvm/Support/MathExtras.h"
33 33
34 #include <algorithm> 34 #include <algorithm>
35 #include <array>
35 #include <utility> 36 #include <utility>
36 37
37 namespace Ice { 38 namespace Ice {
38 39
39 namespace { 40 namespace {
40 41
41 // The following table summarizes the logic for lowering the icmp instruction 42 // The following table summarizes the logic for lowering the icmp instruction
42 // for i32 and narrower types. Each icmp condition has a clear mapping to an 43 // for i32 and narrower types. Each icmp condition has a clear mapping to an
43 // ARM32 conditional move instruction. 44 // ARM32 conditional move instruction.
44 45
(...skipping 1984 matching lines...) Expand 10 before | Expand all | Expand 10 after
2029 case InstArithmetic::Udiv: 2030 case InstArithmetic::Udiv:
2030 case InstArithmetic::Sdiv: 2031 case InstArithmetic::Sdiv:
2031 case InstArithmetic::Urem: 2032 case InstArithmetic::Urem:
2032 case InstArithmetic::Srem: 2033 case InstArithmetic::Srem:
2033 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " 2034 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2034 "should have already been handled before"); 2035 "should have already been handled before");
2035 return; 2036 return;
2036 } 2037 }
2037 } 2038 }
2038 2039
2040 namespace {
2041 // StrengthReduction is a namespace with the strength reduction machinery. The
2042 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2043 // if the optimization can be performed, and false otherwise.
2044 //
2045 // If the optimization can be performed, tryToOptimize sets its NumOperations
2046 // parameter to the number of shifts that are needed to perform the
2047 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2048 // tuples that describe how to materialize the multiplication.
2049 //
2050 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2051 // or two shifts to materialize it. A sequence of 1s, i.e.
Jim Stichnoth 2015/11/24 19:32:20 e.g. instead of i.e., right?
John 2015/11/24 20:26:55 Done.
2052 //
2053 // M N
2054 // ...00000000000011111...111110000000...
2055 //
2056 // is materializable with (1 << (M + 1)) - (1 << N):
2057 //
2058 // ...00000000000100000...000000000000... [1 << (M + 1)]
2059 // ...00000000000000000...000010000000... (-) [1 << N]
2060 // --------------------------------------
2061 // ...00000000000011111...111110000000...
2062 //
2063 // And a single bit set, i.e., is just a left shift.
2064 namespace StrengthReduction {
2065 enum AggregationOperation {
2066 AO_Invalid,
2067 AO_Add,
2068 AO_Sub,
2069 };
2070
2071 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2072 class AggregationElement {
2073 AggregationElement(const AggregationElement &) = delete;
2074
2075 public:
2076 AggregationElement() = default;
2077 AggregationElement &operator=(const AggregationElement &) = default;
2078 AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2079 : Op(Op), ShAmt(ShAmt) {}
2080
2081 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2082 assert(OpR->mustHaveReg());
2083 if (ShAmt == 0) {
2084 return OpR;
2085 }
2086 return OperandARM32FlexReg::create(
2087 Func, IceType_i32, OpR, OperandARM32::LSL,
2088 OperandARM32ShAmtImm::create(
2089 Func, llvm::cast<ConstantInteger32>(
2090 Func->getContext()->getConstantInt32(ShAmt))));
2091 }
2092
2093 bool aggregateWithAdd() const {
2094 switch (Op) {
2095 case AO_Invalid:
2096 llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2097 case AO_Add:
2098 return true;
2099 case AO_Sub:
2100 return false;
2101 }
2102 }
2103
2104 uint32_t shAmt() const { return ShAmt; }
2105
2106 private:
2107 AggregationOperation Op = AO_Invalid;
2108 uint32_t ShAmt = 33;
Jim Stichnoth 2015/11/24 19:32:20 I assumed 33 is meant to be an invalid value when
John 2015/11/24 20:26:55 This used to matter when this element was a const.
2109 };
2110
2111 template <std::size_t N>
2112 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
Jim Stichnoth 2015/11/24 19:32:20 I think it might be helpful to document that [Rang
John 2015/11/24 20:26:55 Done.
2113 std::array<AggregationElement, N> *Operations) {
2114 assert(*NumOperations < N);
2115 if (RangeStart == RangeEnd) {
2116 // Single bit set:
2117 // Src : 0...00010...
2118 // RangeStart : ^
2119 // RangeEnd : ^
2120 // NegSrc : 0...00001...
2121 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
2122 ++(*NumOperations);
2123 return true;
2124 }
2125
2126 // Sequence of 1s: (two operations required.)
2127 // Src : 0...00011...110...
2128 // RangeStart : ^
2129 // RangeEnd : ^
2130 // NegSrc : 0...00000...001...
2131 if (*NumOperations + 1 >= N) {
2132 return false;
2133 }
2134 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
2135 ++(*NumOperations);
2136 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
2137 ++(*NumOperations);
2138 return true;
2139 }
2140
2141 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
2142 // 1 surrounded by zeroes.
2143 template <std::size_t N>
2144 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
2145 std::array<AggregationElement, N> *Operations) {
2146 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
2147 uint32_t NegSrc = ~Src;
2148
2149 *NumOperations = 0;
2150 while (Src != 0 && *NumOperations < N) {
2151 // Each step of the algorithm:
2152 // * finds L, the last bit set in Src;
2153 // * clears all the upper bits in NegSrc up bit L;
Jim Stichnoth 2015/11/24 19:32:20 up to bit
John 2015/11/24 20:26:55 Done.
2154 // * finds nL, the last bit set in NegSrc;
2155 // * clears all the upper bits in Src up to bit nL;
2156 //
2157 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
2158 // of 1s starting at L, and ending a nL + 1, was found.
Jim Stichnoth 2015/11/24 19:32:20 ending at
John 2015/11/24 20:26:55 Done.
2159 const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
2160 const uint32_t NegSrcClearMask =
2161 (SrcLastBitSet == 0) ? 0
2162 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
2163 NegSrc &= NegSrcClearMask;
2164 if (NegSrc == 0) {
2165 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
2166 return true;
2167 }
2168 return false;
2169 }
2170 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
2171 assert(NegSrcLastBitSet < SrcLastBitSet);
2172 const uint32_t SrcClearMask =
2173 (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >>
2174 (SrcSizeBits - NegSrcLastBitSet);
2175 Src &= SrcClearMask;
2176 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
2177 Operations)) {
2178 return false;
2179 }
2180 }
2181
2182 return Src == 0;
2183 }
2184 } // end of namespace StrengthReduction
2185 } // end of anonymous namespace
2186
2039 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 2187 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
2040 Variable *Dest = Inst->getDest(); 2188 Variable *Dest = Inst->getDest();
2041 2189
2042 if (Dest->isRematerializable()) { 2190 if (Dest->isRematerializable()) {
2043 Context.insert(InstFakeDef::create(Func, Dest)); 2191 Context.insert(InstFakeDef::create(Func, Dest));
2044 return; 2192 return;
2045 } 2193 }
2046 2194
2047 if (Dest->getType() == IceType_i1) { 2195 if (Dest->getType() == IceType_i1) {
2048 lowerInt1Arithmetic(Inst); 2196 lowerInt1Arithmetic(Inst);
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
2198 } 2346 }
2199 case InstArithmetic::Xor: { 2347 case InstArithmetic::Xor: {
2200 Variable *Src0R = Srcs.src0R(this); 2348 Variable *Src0R = Srcs.src0R(this);
2201 Operand *Src1RF = Srcs.src1RF(this); 2349 Operand *Src1RF = Srcs.src1RF(this);
2202 _eor(T, Src0R, Src1RF); 2350 _eor(T, Src0R, Src1RF);
2203 _mov(Dest, T); 2351 _mov(Dest, T);
2204 return; 2352 return;
2205 } 2353 }
2206 case InstArithmetic::Sub: { 2354 case InstArithmetic::Sub: {
2207 if (Srcs.hasConstOperand()) { 2355 if (Srcs.hasConstOperand()) {
2208 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
2209 // to be used.
2210 if (Srcs.immediateIsFlexEncodable()) { 2356 if (Srcs.immediateIsFlexEncodable()) {
2211 Variable *Src0R = Srcs.src0R(this); 2357 Variable *Src0R = Srcs.src0R(this);
2212 Operand *Src1RF = Srcs.src1RF(this); 2358 Operand *Src1RF = Srcs.src1RF(this);
2213 if (Srcs.swappedOperands()) { 2359 if (Srcs.swappedOperands()) {
2214 _rsb(T, Src0R, Src1RF); 2360 _rsb(T, Src0R, Src1RF);
2215 } else { 2361 } else {
2216 _sub(T, Src0R, Src1RF); 2362 _sub(T, Src0R, Src1RF);
2217 } 2363 }
2218 _mov(Dest, T); 2364 _mov(Dest, T);
2219 return; 2365 return;
2220 } 2366 }
2221 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { 2367 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
2222 Variable *Src0R = Srcs.src0R(this); 2368 Variable *Src0R = Srcs.src0R(this);
2223 Operand *Src1F = Srcs.negatedSrc1F(this); 2369 Operand *Src1F = Srcs.negatedSrc1F(this);
2224 _add(T, Src0R, Src1F); 2370 _add(T, Src0R, Src1F);
2225 _mov(Dest, T); 2371 _mov(Dest, T);
2226 return; 2372 return;
2227 } 2373 }
2228 } 2374 }
2229 Variable *Src0R = Srcs.unswappedSrc0R(this); 2375 Variable *Src0R = Srcs.unswappedSrc0R(this);
2230 Variable *Src1R = Srcs.unswappedSrc1R(this); 2376 Variable *Src1R = Srcs.unswappedSrc1R(this);
2231 _sub(T, Src0R, Src1R); 2377 _sub(T, Src0R, Src1R);
2232 _mov(Dest, T); 2378 _mov(Dest, T);
2233 return; 2379 return;
2234 } 2380 }
2235 case InstArithmetic::Mul: { 2381 case InstArithmetic::Mul: {
2382 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
Jim Stichnoth 2015/11/24 19:32:20 In IceTargetLoweringX86BaseImpl.h, strength-reduce
John 2015/11/24 20:26:55 In this case, I ran ammp with 4 and 5 shifts, and
2383 if (!OptM1 && Srcs.hasConstOperand()) {
2384 constexpr std::size_t MaxShifts = 4;
2385 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
2386 SizeT NumOperations;
2387 int32_t Const = Srcs.getConstantValue();
2388 const bool Invert = Const < 0;
2389 const bool MultiplyByZero = Const == 0;
2390 Operand *_0 = legalize(Ctx->getConstantInt32(0), Legal_Reg | Legal_Flex);
Jim Stichnoth 2015/11/24 19:32:20 Instead of getConstantInt32(0), it may be better t
John 2015/11/24 20:26:55 Done.
2391
2392 if (MultiplyByZero) {
2393 _mov(T, _0);
2394 _mov(Dest, T);
2395 return;
2396 }
2397
2398 if (Invert) {
2399 Const = -Const;
2400 }
2401
2402 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
2403 assert(NumOperations >= 1);
2404 Variable *Src0R = Srcs.src0R(this);
2405 int32_t Start;
2406 int32_t End;
2407 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
2408 // Multiplication by a power of 2 (NumOperations == 1); or
2409 // Multiplication by a even number not a power of 2.
2410 Start = 1;
2411 End = NumOperations;
2412 assert(Shifts[0].aggregateWithAdd());
2413 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
2414 } else {
2415 // Multiplication by an odd number. Put the free barrel shifter to a
2416 // good use.
2417 Start = 0;
2418 End = NumOperations - 2;
2419 const StrengthReduction::AggregationElement &Last =
2420 Shifts[NumOperations - 1];
2421 const StrengthReduction::AggregationElement &SecondToLast =
2422 Shifts[NumOperations - 2];
2423 if (!Last.aggregateWithAdd()) {
2424 assert(SecondToLast.aggregateWithAdd());
2425 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
2426 } else if (!SecondToLast.aggregateWithAdd()) {
2427 assert(Last.aggregateWithAdd());
2428 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
2429 } else {
2430 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
2431 }
2432 }
2433
2434 // Odd numbers : S E I I
2435 // +---+---+---+---+---+---+ ... +---+---+---+---+
2436 // Shifts = | | | | | | | ... | | | | |
2437 // +---+---+---+---+---+---+ ... +---+---+---+---+
2438 // Even numbers: I S E
2439 //
2440 // S: Start; E: End; I: Init
2441 for (int32_t I = Start; I < End; ++I) {
2442 const StrengthReduction::AggregationElement &Current = Shifts[I];
2443 Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
2444 if (Current.aggregateWithAdd()) {
2445 _add(T, T, SrcF);
2446 } else {
2447 _sub(T, T, SrcF);
2448 }
2449 }
2450
2451 if (Invert) {
2452 // T = 0 - T.
2453 _rsb(T, T, _0);
2454 }
2455
2456 _mov(Dest, T);
2457 return;
2458 }
2459 }
2236 Variable *Src0R = Srcs.unswappedSrc0R(this); 2460 Variable *Src0R = Srcs.unswappedSrc0R(this);
2237 Variable *Src1R = Srcs.unswappedSrc1R(this); 2461 Variable *Src1R = Srcs.unswappedSrc1R(this);
2238 _mul(T, Src0R, Src1R); 2462 _mul(T, Src0R, Src1R);
2239 _mov(Dest, T); 2463 _mov(Dest, T);
2240 return; 2464 return;
2241 } 2465 }
2242 case InstArithmetic::Shl: { 2466 case InstArithmetic::Shl: {
2243 Variable *Src0R = Srcs.unswappedSrc0R(this); 2467 Variable *Src0R = Srcs.unswappedSrc0R(this);
2244 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); 2468 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
2245 _lsl(T, Src0R, Src1R); 2469 _lsl(T, Src0R, Src1R);
(...skipping 3257 matching lines...) Expand 10 before | Expand all | Expand 10 after
5503 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5727 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
5504 // However, for compatibility with current NaCl LLVM, don't claim that. 5728 // However, for compatibility with current NaCl LLVM, don't claim that.
5505 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5729 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
5506 } 5730 }
5507 5731
5508 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5732 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
5509 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5733 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
5510 llvm::SmallBitVector TargetARM32::ScratchRegs; 5734 llvm::SmallBitVector TargetARM32::ScratchRegs;
5511 5735
5512 } // end of namespace Ice 5736 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/arith.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698