src/IceTargetLoweringARM32.cpp - Issue 1469113003: Subzero. ARM32. Strength reduce multiplications.

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1469113003: Subzero. ARM32. Strength reduce multiplications. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Modified arith.ll to test the strength reduction output. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringARM32.cpp

diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index f23609b7837cce25402e4d1382ffa9741273d02c..dfa424710a577ad71091059007a58236a04d5385 100644

--- a/src/IceTargetLoweringARM32.cpp

+++ b/src/IceTargetLoweringARM32.cpp

@@ -32,6 +32,7 @@

#include "llvm/Support/MathExtras.h"

#include <algorithm>

+#include <array>

#include <utility>

namespace Ice {

@@ -2036,6 +2037,153 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,

}

+namespace {

+// StrengthReduction is a namespace with the strength reduction machinery. The

+// entry point is the StrengthReduction::tryToOptimize method. It returns true

+// if the optimization can be performed, and false otherwise.

+//

+// If the optimization can be performed, tryToOptimize sets its NumOperations

+// parameter to the number of shifts that are needed to perform the

+// multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>

+// tuples that describe how to materialize the multiplication.

+//

+// The algorithm finds contiguous 1s in the Multiplication source, and uses one

+// or two shifts to materialize it. A sequence of 1s, i.e.

Jim Stichnoth 2015/11/24 19:32:20 e.g. instead of i.e., right?

John 2015/11/24 20:26:55 Done.

+//

+// M N

+// ...00000000000011111...111110000000...

+//

+// is materializable with (1 << (M + 1)) - (1 << N):

+//

+// ...00000000000100000...000000000000... [1 << (M + 1)]

+// ...00000000000000000...000010000000... (-) [1 << N]

+// --------------------------------------

+// ...00000000000011111...111110000000...

+//

+// And a single bit set, i.e., is just a left shift.

+namespace StrengthReduction {

+enum AggregationOperation {

+ AO_Invalid,

+ AO_Add,

+ AO_Sub,

+};

+// AggregateElement is a glorified <ShAmt, AddOrSub> tuple.

+class AggregationElement {

+ AggregationElement(const AggregationElement &) = delete;

+public:

+ AggregationElement() = default;

+ AggregationElement &operator=(const AggregationElement &) = default;

+ AggregationElement(AggregationOperation Op, uint32_t ShAmt)

+ : Op(Op), ShAmt(ShAmt) {}

+ Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {

+ assert(OpR->mustHaveReg());

+ if (ShAmt == 0) {

+ return OpR;

+ }

+ return OperandARM32FlexReg::create(

+ Func, IceType_i32, OpR, OperandARM32::LSL,

+ OperandARM32ShAmtImm::create(

+ Func, llvm::cast<ConstantInteger32>(

+ Func->getContext()->getConstantInt32(ShAmt))));

+ }

+ bool aggregateWithAdd() const {

+ switch (Op) {

+ case AO_Invalid:

+ llvm::report_fatal_error("Invalid Strength Reduction Operations.");

+ case AO_Add:

+ return true;

+ case AO_Sub:

+ return false;

+ }

+ uint32_t shAmt() const { return ShAmt; }

+private:

+ AggregationOperation Op = AO_Invalid;

+ uint32_t ShAmt = 33;

Jim Stichnoth 2015/11/24 19:32:20 I assumed 33 is meant to be an invalid value when

John 2015/11/24 20:26:55 This used to matter when this element was a const.

+};

+template <std::size_t N>

+bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,

Jim Stichnoth 2015/11/24 19:32:20 I think it might be helpful to document that [Rang

John 2015/11/24 20:26:55 Done.

+ std::array<AggregationElement, N> *Operations) {

+ assert(*NumOperations < N);

+ if (RangeStart == RangeEnd) {

+ // Single bit set:

+ // Src : 0...00010...

+ // RangeStart : ^

+ // RangeEnd : ^

+ // NegSrc : 0...00001...

+ (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);

+ ++(*NumOperations);

+ return true;

+ }

+ // Sequence of 1s: (two operations required.)

+ // Src : 0...00011...110...

+ // RangeStart : ^

+ // RangeEnd : ^

+ // NegSrc : 0...00000...001...

+ if (*NumOperations + 1 >= N) {

+ return false;

+ }

+ (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);

+ ++(*NumOperations);

+ (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);

+ ++(*NumOperations);

+ return true;

+// tryToOptmize scans Src looking for sequences of 1s (including the unitary bit

+// 1 surrounded by zeroes.

+template <std::size_t N>

+bool tryToOptimize(uint32_t Src, SizeT *NumOperations,

+ std::array<AggregationElement, N> *Operations) {

+ constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;

+ uint32_t NegSrc = ~Src;

+ *NumOperations = 0;

+ while (Src != 0 && *NumOperations < N) {

+ // Each step of the algorithm:

+ // * finds L, the last bit set in Src;

+ // * clears all the upper bits in NegSrc up bit L;

Jim Stichnoth 2015/11/24 19:32:20 up to bit

John 2015/11/24 20:26:55 Done.

+ // * finds nL, the last bit set in NegSrc;

+ // * clears all the upper bits in Src up to bit nL;

+ //

+ // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence

+ // of 1s starting at L, and ending a nL + 1, was found.

Jim Stichnoth 2015/11/24 19:32:20 ending at

John 2015/11/24 20:26:55 Done.

+ const uint32_t SrcLastBitSet = llvm::findLastSet(Src);

+ const uint32_t NegSrcClearMask =

+ (SrcLastBitSet == 0) ? 0

+ : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);

+ NegSrc &= NegSrcClearMask;

+ if (NegSrc == 0) {

+ if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {

+ return true;

+ }

+ return false;

+ }

+ const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);

+ assert(NegSrcLastBitSet < SrcLastBitSet);

+ const uint32_t SrcClearMask =

+ (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >>

+ (SrcSizeBits - NegSrcLastBitSet);

+ Src &= SrcClearMask;

+ if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,

+ Operations)) {

+ return false;

+ }

+ return Src == 0;

+} // end of namespace StrengthReduction

+} // end of anonymous namespace

void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

Variable *Dest = Inst->getDest();

@@ -2205,8 +2353,6 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

}

case InstArithmetic::Sub: {

if (Srcs.hasConstOperand()) {

- // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed

- // to be used.

if (Srcs.immediateIsFlexEncodable()) {

Variable *Src0R = Srcs.src0R(this);

Operand *Src1RF = Srcs.src1RF(this);

@@ -2233,6 +2379,84 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

return;

}

case InstArithmetic::Mul: {

+ const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;

Jim Stichnoth 2015/11/24 19:32:20 In IceTargetLoweringX86BaseImpl.h, strength-reduce

John 2015/11/24 20:26:55 In this case, I ran ammp with 4 and 5 shifts, and

+ if (!OptM1 && Srcs.hasConstOperand()) {

+ constexpr std::size_t MaxShifts = 4;

+ std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;

+ SizeT NumOperations;

+ int32_t Const = Srcs.getConstantValue();

+ const bool Invert = Const < 0;

+ const bool MultiplyByZero = Const == 0;

+ Operand *_0 = legalize(Ctx->getConstantInt32(0), Legal_Reg | Legal_Flex);

Jim Stichnoth 2015/11/24 19:32:20 Instead of getConstantInt32(0), it may be better t

John 2015/11/24 20:26:55 Done.

+ if (MultiplyByZero) {

+ _mov(T, _0);

+ _mov(Dest, T);

+ return;

+ }

+ if (Invert) {

+ Const = -Const;

+ }

+ if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {

+ assert(NumOperations >= 1);

+ Variable *Src0R = Srcs.src0R(this);

+ int32_t Start;

+ int32_t End;

+ if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {

+ // Multiplication by a power of 2 (NumOperations == 1); or

+ // Multiplication by a even number not a power of 2.

+ Start = 1;

+ End = NumOperations;

+ assert(Shifts[0].aggregateWithAdd());

+ _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));

+ } else {

+ // Multiplication by an odd number. Put the free barrel shifter to a

+ // good use.

+ Start = 0;

+ End = NumOperations - 2;

+ const StrengthReduction::AggregationElement &Last =

+ Shifts[NumOperations - 1];

+ const StrengthReduction::AggregationElement &SecondToLast =

+ Shifts[NumOperations - 2];

+ if (!Last.aggregateWithAdd()) {

+ assert(SecondToLast.aggregateWithAdd());

+ _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));

+ } else if (!SecondToLast.aggregateWithAdd()) {

+ assert(Last.aggregateWithAdd());

+ _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));

+ } else {

+ _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));

+ }

+ // Odd numbers : S E I I

+ // +---+---+---+---+---+---+ ... +---+---+---+---+

+ // Shifts = | | | | | | | ... | | | | |

+ // +---+---+---+---+---+---+ ... +---+---+---+---+

+ // Even numbers: I S E

+ //

+ // S: Start; E: End; I: Init

+ for (int32_t I = Start; I < End; ++I) {

+ const StrengthReduction::AggregationElement &Current = Shifts[I];

+ Operand *SrcF = Current.createShiftedOperand(Func, Src0R);

+ if (Current.aggregateWithAdd()) {

+ _add(T, T, SrcF);

+ } else {

+ _sub(T, T, SrcF);

+ }

+ if (Invert) {

+ // T = 0 - T.

+ _rsb(T, T, _0);

+ }

+ _mov(Dest, T);

+ return;

+ }

Variable *Src0R = Srcs.unswappedSrc0R(this);

Variable *Src1R = Srcs.unswappedSrc1R(this);

_mul(T, Src0R, Src1R);

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/arith.ll » ('j') | no next file with comments »