Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1469113003: Subzero. ARM32. Strength reduce multiplications. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Adds multiply-by-imm xtests Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringARM32.cpp
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index f23609b7837cce25402e4d1382ffa9741273d02c..dfa424710a577ad71091059007a58236a04d5385 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/MathExtras.h"
#include <algorithm>
+#include <array>
#include <utility>
namespace Ice {
@@ -2036,6 +2037,153 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
}
}
+namespace {
+// StrengthReduction is a namespace with the strength reduction machinery. The
+// entry point is the StrengthReduction::tryToOptimize method. It returns true
+// if the optimization can be performed, and false otherwise.
+//
+// If the optimization can be performed, tryToOptimize sets its NumOperations
+// parameter to the number of shifts that are needed to perform the
+// multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
+// tuples that describe how to materialize the multiplication.
+//
+// The algorithm finds contiguous 1s in the Multiplication source, and uses one
+// or two shifts to materialize it. A sequence of 1s, i.e.
+//
+// M N
+// ...00000000000011111...111110000000...
+//
+// is materializable with (1 << (M + 1)) - (1 << N):
+//
+// ...00000000000100000...000000000000... [1 << (M + 1)]
+// ...00000000000000000...000010000000... (-) [1 << N]
+// --------------------------------------
+// ...00000000000011111...111110000000...
+//
+// And a single bit set, i.e., is just a left shift.
+namespace StrengthReduction {
+enum AggregationOperation {
+ AO_Invalid,
+ AO_Add,
+ AO_Sub,
+};
+
+// AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
+class AggregationElement {
+ AggregationElement(const AggregationElement &) = delete;
+
+public:
+ AggregationElement() = default;
+ AggregationElement &operator=(const AggregationElement &) = default;
+ AggregationElement(AggregationOperation Op, uint32_t ShAmt)
+ : Op(Op), ShAmt(ShAmt) {}
+
+ Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
+ assert(OpR->mustHaveReg());
+ if (ShAmt == 0) {
+ return OpR;
+ }
+ return OperandARM32FlexReg::create(
+ Func, IceType_i32, OpR, OperandARM32::LSL,
+ OperandARM32ShAmtImm::create(
+ Func, llvm::cast<ConstantInteger32>(
+ Func->getContext()->getConstantInt32(ShAmt))));
+ }
+
+ bool aggregateWithAdd() const {
+ switch (Op) {
+ case AO_Invalid:
+ llvm::report_fatal_error("Invalid Strength Reduction Operations.");
+ case AO_Add:
+ return true;
+ case AO_Sub:
+ return false;
+ }
+ }
+
+ uint32_t shAmt() const { return ShAmt; }
+
+private:
+ AggregationOperation Op = AO_Invalid;
+ uint32_t ShAmt = 33;
+};
+
+template <std::size_t N>
+bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
+ std::array<AggregationElement, N> *Operations) {
+ assert(*NumOperations < N);
+ if (RangeStart == RangeEnd) {
+ // Single bit set:
+ // Src : 0...00010...
+ // RangeStart : ^
+ // RangeEnd : ^
+ // NegSrc : 0...00001...
+ (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
+ ++(*NumOperations);
+ return true;
+ }
+
+ // Sequence of 1s: (two operations required.)
+ // Src : 0...00011...110...
+ // RangeStart : ^
+ // RangeEnd : ^
+ // NegSrc : 0...00000...001...
+ if (*NumOperations + 1 >= N) {
+ return false;
+ }
+ (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
+ ++(*NumOperations);
+ (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
+ ++(*NumOperations);
+ return true;
+}
+
+// tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
+// 1 surrounded by zeroes.
+template <std::size_t N>
+bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
+ std::array<AggregationElement, N> *Operations) {
+ constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
+ uint32_t NegSrc = ~Src;
+
+ *NumOperations = 0;
+ while (Src != 0 && *NumOperations < N) {
+ // Each step of the algorithm:
+ // * finds L, the last bit set in Src;
+ // * clears all the upper bits in NegSrc up bit L;
+ // * finds nL, the last bit set in NegSrc;
+ // * clears all the upper bits in Src up to bit nL;
+ //
+ // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
+ // of 1s starting at L, and ending a nL + 1, was found.
+ const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
+ const uint32_t NegSrcClearMask =
+ (SrcLastBitSet == 0) ? 0
+ : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
+ NegSrc &= NegSrcClearMask;
+ if (NegSrc == 0) {
+ if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
+ return true;
+ }
+ return false;
+ }
+ const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
+ assert(NegSrcLastBitSet < SrcLastBitSet);
+ const uint32_t SrcClearMask =
+ (NegSrcLastBitSet == 0) ? 0 : (0xFFFFFFFFu) >>
+ (SrcSizeBits - NegSrcLastBitSet);
+ Src &= SrcClearMask;
+ if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
+ Operations)) {
+ return false;
+ }
+ }
+
+ return Src == 0;
+}
+} // end of namespace StrengthReduction
+} // end of anonymous namespace
+
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
@@ -2205,8 +2353,6 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
}
case InstArithmetic::Sub: {
if (Srcs.hasConstOperand()) {
- // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
- // to be used.
if (Srcs.immediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
@@ -2233,6 +2379,84 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return;
}
case InstArithmetic::Mul: {
+ const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
+ if (!OptM1 && Srcs.hasConstOperand()) {
+ constexpr std::size_t MaxShifts = 4;
+ std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
+ SizeT NumOperations;
+ int32_t Const = Srcs.getConstantValue();
+ const bool Invert = Const < 0;
+ const bool MultiplyByZero = Const == 0;
+ Operand *_0 = legalize(Ctx->getConstantInt32(0), Legal_Reg | Legal_Flex);
+
+ if (MultiplyByZero) {
+ _mov(T, _0);
+ _mov(Dest, T);
+ return;
+ }
+
+ if (Invert) {
+ Const = -Const;
+ }
+
+ if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
+ assert(NumOperations >= 1);
+ Variable *Src0R = Srcs.src0R(this);
+ int32_t Start;
+ int32_t End;
+ if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
+ // Multiplication by a power of 2 (NumOperations == 1); or
+ // Multiplication by a even number not a power of 2.
+ Start = 1;
+ End = NumOperations;
+ assert(Shifts[0].aggregateWithAdd());
+ _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
+ } else {
+ // Multiplication by an odd number. Put the free barrel shifter to a
+ // good use.
+ Start = 0;
+ End = NumOperations - 2;
+ const StrengthReduction::AggregationElement &Last =
+ Shifts[NumOperations - 1];
+ const StrengthReduction::AggregationElement &SecondToLast =
+ Shifts[NumOperations - 2];
+ if (!Last.aggregateWithAdd()) {
+ assert(SecondToLast.aggregateWithAdd());
+ _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
+ } else if (!SecondToLast.aggregateWithAdd()) {
+ assert(Last.aggregateWithAdd());
+ _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
+ } else {
+ _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
+ }
+ }
+
+ // Odd numbers : S E I I
+ // +---+---+---+---+---+---+ ... +---+---+---+---+
+ // Shifts = | | | | | | | ... | | | | |
+ // +---+---+---+---+---+---+ ... +---+---+---+---+
+ // Even numbers: I S E
+ //
+ // S: Start; E: End; I: Init
+ for (int32_t I = Start; I < End; ++I) {
+ const StrengthReduction::AggregationElement &Current = Shifts[I];
+ Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
+ if (Current.aggregateWithAdd()) {
+ _add(T, T, SrcF);
+ } else {
+ _sub(T, T, SrcF);
+ }
+ }
+
+ if (Invert) {
+ // T = 0 - T.
+ _rsb(T, T, _0);
+ }
+
+ _mov(Dest, T);
+ return;
+ }
+ }
Variable *Src0R = Srcs.unswappedSrc0R(this);
Variable *Src1R = Srcs.unswappedSrc1R(this);
_mul(T, Src0R, Src1R);

Powered by Google App Engine
This is Rietveld 408576698