Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index f23609b7837cce25402e4d1382ffa9741273d02c..dfa424710a577ad71091059007a58236a04d5385 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -32,6 +32,7 @@ |
| #include "llvm/Support/MathExtras.h" |
| #include <algorithm> |
| +#include <array> |
| #include <utility> |
| namespace Ice { |
| @@ -2036,6 +2037,153 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, |
| } |
| } |
| +namespace { |
| +// StrengthReduction is a namespace with the strength reduction machinery. The |
| +// entry point is the StrengthReduction::tryToOptimize method. It returns true |
| +// if the optimization can be performed, and false otherwise. |
| +// |
| +// If the optimization can be performed, tryToOptimize sets its NumOperations |
| +// parameter to the number of shifts that are needed to perform the |
| +// multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub> |
| +// tuples that describe how to materialize the multiplication. |
| +// |
| +// The algorithm finds contiguous 1s in the Multiplication source, and uses one |
| +// or two shifts to materialize it. A sequence of 1s, i.e. |
|
Jim Stichnoth
2015/11/24 19:32:20
e.g. instead of i.e., right?
John
2015/11/24 20:26:55
Done.
|
| +// |
| +// M N |
| +// ...00000000000011111...111110000000... |
| +// |
| +// is materializable with (1 << (M + 1)) - (1 << N): |
| +// |
| +// ...00000000000100000...000000000000... [1 << (M + 1)] |
| +// ...00000000000000000...000010000000... (-) [1 << N] |
| +// -------------------------------------- |
| +// ...00000000000011111...111110000000... |
| +// |
| +// And a single bit set, i.e., is just a left shift. |
| +namespace StrengthReduction { |
| +enum AggregationOperation { |
| + AO_Invalid, |
| + AO_Add, |
| + AO_Sub, |
| +}; |
| + |
| +// AggregateElement is a glorified <ShAmt, AddOrSub> tuple. |
| +class AggregationElement { |
| + AggregationElement(const AggregationElement &) = delete; |
| + |
| +public: |
| + AggregationElement() = default; |
| + AggregationElement &operator=(const AggregationElement &) = default; |
| + AggregationElement(AggregationOperation Op, uint32_t ShAmt) |
| + : Op(Op), ShAmt(ShAmt) {} |
| + |
| + Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const { |
| + assert(OpR->mustHaveReg()); |
| + if (ShAmt == 0) { |
| + return OpR; |
| + } |
| + return OperandARM32FlexReg::create( |
| + Func, IceType_i32, OpR, OperandARM32::LSL, |
| + OperandARM32ShAmtImm::create( |
| + Func, llvm::cast<ConstantInteger32>( |
| + Func->getContext()->getConstantInt32(ShAmt)))); |
| + } |
| + |
| + bool aggregateWithAdd() const { |
| + switch (Op) { |
| + case AO_Invalid: |
| + llvm::report_fatal_error("Invalid Strength Reduction Operations."); |
| + case AO_Add: |
| + return true; |
| + case AO_Sub: |
| + return false; |
| + } |
| + } |
| + |
| + uint32_t shAmt() const { return ShAmt; } |
| + |
| +private: |
| + AggregationOperation Op = AO_Invalid; |
| + uint32_t ShAmt = 33; |
|
Jim Stichnoth
2015/11/24 19:32:20
I assumed 33 is meant to be an invalid value when
John
2015/11/24 20:26:55
This used to matter when this element was a const.
|
| +}; |
| + |
| +template <std::size_t N> |
| +bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations, |
|
Jim Stichnoth
2015/11/24 19:32:20
I think it might be helpful to document that [Rang
John
2015/11/24 20:26:55
Done.
|
| + std::array<AggregationElement, N> *Operations) { |
| + assert(*NumOperations < N); |
| + if (RangeStart == RangeEnd) { |
| + // Single bit set: |
| + // Src : 0...00010... |
| + // RangeStart : ^ |
| + // RangeEnd : ^ |
| + // NegSrc : 0...00001... |
| + (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart); |
| + ++(*NumOperations); |
| + return true; |
| + } |
| + |
| + // Sequence of 1s: (two operations required.) |
| + // Src : 0...00011...110... |
| + // RangeStart : ^ |
| + // RangeEnd : ^ |
| + // NegSrc : 0...00000...001... |
| + if (*NumOperations + 1 >= N) { |
| + return false; |
| + } |
| + (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1); |
| + ++(*NumOperations); |
| + (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd); |
| + ++(*NumOperations); |
| + return true; |
| +} |
| + |
| +// tryToOptmize scans Src looking for sequences of 1s (including the unitary bit |
| +// 1 surrounded by zeroes. |
| +template <std::size_t N> |
| +bool tryToOptimize(uint32_t Src, SizeT *NumOperations, |
| + std::array<AggregationElement, N> *Operations) { |
| + constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT; |
| + uint32_t NegSrc = ~Src; |
| + |
| + *NumOperations = 0; |
| + while (Src != 0 && *NumOperations < N) { |
| + // Each step of the algorithm: |
| + // * finds L, the last bit set in Src; |
| + // * clears all the upper bits in NegSrc up bit L; |
|
Jim Stichnoth
2015/11/24 19:32:20
up to bit
John
2015/11/24 20:26:55
Done.
|
| + // * finds nL, the last bit set in NegSrc; |
| + // * clears all the upper bits in Src up to bit nL; |
| + // |
| + // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence |
| + // of 1s starting at L, and ending a nL + 1, was found. |
|
Jim Stichnoth
2015/11/24 19:32:20
ending at
John
2015/11/24 20:26:55
Done.
|
| + const uint32_t SrcLastBitSet = llvm::findLastSet(Src); |
| + const uint32_t NegSrcClearMask = |
| + (SrcLastBitSet == 0) ? 0 |
| + : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet); |
| + NegSrc &= NegSrcClearMask; |
| + if (NegSrc == 0) { |
| + if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) { |
| + return true; |
| + } |
| + return false; |
| + } |
| + const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc); |
| + assert(NegSrcLastBitSet < SrcLastBitSet); |
| + const uint32_t SrcClearMask = |
| + (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >> |
| + (SrcSizeBits - NegSrcLastBitSet); |
| + Src &= SrcClearMask; |
| + if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations, |
| + Operations)) { |
| + return false; |
| + } |
| + } |
| + |
| + return Src == 0; |
| +} |
| +} // end of namespace StrengthReduction |
| +} // end of anonymous namespace |
| + |
| void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| Variable *Dest = Inst->getDest(); |
| @@ -2205,8 +2353,6 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| } |
| case InstArithmetic::Sub: { |
| if (Srcs.hasConstOperand()) { |
| - // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed |
| - // to be used. |
| if (Srcs.immediateIsFlexEncodable()) { |
| Variable *Src0R = Srcs.src0R(this); |
| Operand *Src1RF = Srcs.src1RF(this); |
| @@ -2233,6 +2379,84 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| return; |
| } |
| case InstArithmetic::Mul: { |
| + const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; |
|
Jim Stichnoth
2015/11/24 19:32:20
In IceTargetLoweringX86BaseImpl.h, strength-reduce
John
2015/11/24 20:26:55
In this case, I ran ammp with 4 and 5 shifts, and
|
| + if (!OptM1 && Srcs.hasConstOperand()) { |
| + constexpr std::size_t MaxShifts = 4; |
| + std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts; |
| + SizeT NumOperations; |
| + int32_t Const = Srcs.getConstantValue(); |
| + const bool Invert = Const < 0; |
| + const bool MultiplyByZero = Const == 0; |
| + Operand *_0 = legalize(Ctx->getConstantInt32(0), Legal_Reg | Legal_Flex); |
|
Jim Stichnoth
2015/11/24 19:32:20
Instead of getConstantInt32(0), it may be better t
John
2015/11/24 20:26:55
Done.
|
| + |
| + if (MultiplyByZero) { |
| + _mov(T, _0); |
| + _mov(Dest, T); |
| + return; |
| + } |
| + |
| + if (Invert) { |
| + Const = -Const; |
| + } |
| + |
| + if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) { |
| + assert(NumOperations >= 1); |
| + Variable *Src0R = Srcs.src0R(this); |
| + int32_t Start; |
| + int32_t End; |
| + if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) { |
| + // Multiplication by a power of 2 (NumOperations == 1); or |
| + // Multiplication by a even number not a power of 2. |
| + Start = 1; |
| + End = NumOperations; |
| + assert(Shifts[0].aggregateWithAdd()); |
| + _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt())); |
| + } else { |
| + // Multiplication by an odd number. Put the free barrel shifter to a |
| + // good use. |
| + Start = 0; |
| + End = NumOperations - 2; |
| + const StrengthReduction::AggregationElement &Last = |
| + Shifts[NumOperations - 1]; |
| + const StrengthReduction::AggregationElement &SecondToLast = |
| + Shifts[NumOperations - 2]; |
| + if (!Last.aggregateWithAdd()) { |
| + assert(SecondToLast.aggregateWithAdd()); |
| + _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); |
| + } else if (!SecondToLast.aggregateWithAdd()) { |
| + assert(Last.aggregateWithAdd()); |
| + _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); |
| + } else { |
| + _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R)); |
| + } |
| + } |
| + |
| + // Odd numbers : S E I I |
| + // +---+---+---+---+---+---+ ... +---+---+---+---+ |
| + // Shifts = | | | | | | | ... | | | | | |
| + // +---+---+---+---+---+---+ ... +---+---+---+---+ |
| + // Even numbers: I S E |
| + // |
| + // S: Start; E: End; I: Init |
| + for (int32_t I = Start; I < End; ++I) { |
| + const StrengthReduction::AggregationElement &Current = Shifts[I]; |
| + Operand *SrcF = Current.createShiftedOperand(Func, Src0R); |
| + if (Current.aggregateWithAdd()) { |
| + _add(T, T, SrcF); |
| + } else { |
| + _sub(T, T, SrcF); |
| + } |
| + } |
| + |
| + if (Invert) { |
| + // T = 0 - T. |
| + _rsb(T, T, _0); |
| + } |
| + |
| + _mov(Dest, T); |
| + return; |
| + } |
| + } |
| Variable *Src0R = Srcs.unswappedSrc0R(this); |
| Variable *Src1R = Srcs.unswappedSrc1R(this); |
| _mul(T, Src0R, Src1R); |