Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(65)

Unified Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2923103003: [WASM] Simplify SIMD shuffle opcodes. (Closed)
Patch Set: Mircea's review comments. Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | src/compiler/instruction-selector.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/compiler/arm/instruction-selector-arm.cc
diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc
index 83a977867b6d6659206e0f365874031e46b6c171..6a7117ff00ec17c50d78f81561315201a8191a24 100644
--- a/src/compiler/arm/instruction-selector-arm.cc
+++ b/src/compiler/arm/instruction-selector-arm.cc
@@ -2535,32 +2535,71 @@ void InstructionSelector::VisitS128Select(Node* node) {
}
namespace {
-template <int LANES>
+
+// Tries to match 8x16 byte shuffle to equivalent 32x4 word shuffle.
+bool TryMatch32x4Shuffle(const uint8_t* shuffle, uint8_t* shuffle32x4) {
+ static const int kLanes = 4;
+ static const int kLaneSize = 4;
+ for (int i = 0; i < kLanes; ++i) {
+ if (shuffle[i * kLaneSize] % kLaneSize != 0) return false;
+ for (int j = 1; j < kLaneSize; ++j) {
+ if (shuffle[i * kLaneSize + j] - shuffle[i * kLaneSize + j - 1] != 1)
+ return false;
+ }
+ shuffle32x4[i] = shuffle[i * kLaneSize] / kLaneSize;
+ }
+ return true;
+}
+
+// Tries to match byte shuffle to concatenate (vext) operation.
+bool TryMatchConcat(const uint8_t* shuffle, uint8_t mask, uint8_t* offset) {
+ uint8_t start = shuffle[0];
+ for (int i = 1; i < kSimd128Size - start; ++i) {
+ if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return false;
+ }
+ uint8_t wrap = kSimd128Size;
+ for (int i = kSimd128Size - start; i < kSimd128Size; ++i, ++wrap) {
+ if ((shuffle[i] & mask) != (wrap & mask)) return false;
+ }
+ *offset = start;
+ return true;
+}
+
struct ShuffleEntry {
- uint8_t shuffle[LANES];
+ uint8_t shuffle[kSimd128Size];
ArchOpcode opcode;
};
-static const ShuffleEntry<4> arch_s32x4_shuffles[] = {
- {{0, 4, 1, 5}, kArmS32x4ZipLeft},
- {{2, 6, 3, 7}, kArmS32x4ZipRight},
- {{0, 2, 4, 6}, kArmS32x4UnzipLeft},
- {{1, 3, 5, 7}, kArmS32x4UnzipRight},
- {{0, 4, 2, 6}, kArmS32x4TransposeLeft},
- {{1, 5, 3, 7}, kArmS32x4TransposeRight},
- {{1, 0, 3, 2}, kArmS32x2Reverse}};
-
-static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
- {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft},
- {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight},
- {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft},
- {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight},
- {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft},
- {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight},
- {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse},
- {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}};
-
-static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
+static const ShuffleEntry arch_shuffles[] = {
+ {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23},
+ kArmS32x4ZipLeft},
+ {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31},
+ kArmS32x4ZipRight},
+ {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27},
+ kArmS32x4UnzipLeft},
+ {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31},
+ kArmS32x4UnzipRight},
+ {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27},
+ kArmS32x4TransposeLeft},
+ {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31},
+ kArmS32x4TransposeRight},
+ {{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, kArmS32x2Reverse},
+
+ {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23},
+ kArmS16x8ZipLeft},
+ {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31},
+ kArmS16x8ZipRight},
+ {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29},
+ kArmS16x8UnzipLeft},
+ {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31},
+ kArmS16x8UnzipRight},
+ {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29},
+ kArmS16x8TransposeLeft},
+ {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31},
+ kArmS16x8TransposeRight},
+ {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kArmS16x4Reverse},
+ {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kArmS16x2Reverse},
+
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
kArmS8x16ZipLeft},
{{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
@@ -2577,45 +2616,28 @@ static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
{{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
-// Use a non-shuffle opcode to signal no match.
-static const ArchOpcode kNoShuffle = kArmS128Not;
-
-template <int LANES>
-ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle,
- const ShuffleEntry<LANES>* table,
- size_t num_entries, uint8_t mask) {
- for (size_t i = 0; i < num_entries; i++) {
- const ShuffleEntry<LANES>& entry = table[i];
+bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table,
+ size_t num_entries, uint8_t mask, ArchOpcode* opcode) {
+ for (size_t i = 0; i < num_entries; ++i) {
+ const ShuffleEntry& entry = table[i];
int j = 0;
- for (; j < LANES; j++) {
+ for (; j < kSimd128Size; ++j) {
if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) {
break;
}
}
- if (j == LANES) return entry.opcode;
- }
- return kNoShuffle;
-}
-
-// Returns the bias if shuffle is a concatenation, 0 otherwise.
-template <int LANES>
-uint8_t TryMatchConcat(const uint8_t* shuffle, uint8_t mask) {
- uint8_t start = shuffle[0];
- int i = 1;
- for (; i < LANES - start; i++) {
- if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return 0;
- }
- uint8_t wrap = LANES;
- for (; i < LANES; i++, wrap++) {
- if ((shuffle[i] & mask) != (wrap & mask)) return 0;
+ if (j == kSimd128Size) {
+ *opcode = entry.opcode;
+ return true;
+ }
}
- return start;
+ return false;
}
// Canonicalize shuffles to make pattern matching simpler. Returns a mask that
// will ignore the high bit of indices in some cases.
-uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
- int num_lanes) {
+uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node) {
+ static const int kUnaryShuffleMask = kSimd128Size - 1;
const uint8_t* shuffle = OpParameter<uint8_t*>(node);
uint8_t mask = 0xff;
// If shuffle is unary, set 'mask' to ignore the high bit of the indices.
@@ -2623,12 +2645,12 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
if (selector->GetVirtualRegister(node->InputAt(0)) ==
selector->GetVirtualRegister(node->InputAt(1))) {
// unary, src0 == src1.
- mask = num_lanes - 1;
+ mask = kUnaryShuffleMask;
} else {
bool src0_is_used = false;
bool src1_is_used = false;
- for (int i = 0; i < num_lanes; i++) {
- if (shuffle[i] < num_lanes) {
+ for (int i = 0; i < kSimd128Size; i++) {
+ if (shuffle[i] < kSimd128Size) {
src0_is_used = true;
} else {
src1_is_used = true;
@@ -2636,10 +2658,10 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
}
if (src0_is_used && !src1_is_used) {
node->ReplaceInput(1, node->InputAt(0));
- mask = num_lanes - 1;
+ mask = kUnaryShuffleMask;
} else if (src1_is_used && !src0_is_used) {
node->ReplaceInput(0, node->InputAt(1));
- mask = num_lanes - 1;
+ mask = kUnaryShuffleMask;
}
}
return mask;
@@ -2647,7 +2669,7 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask) {
int32_t result = 0;
- for (int i = 3; i >= 0; i--) {
+ for (int i = 3; i >= 0; --i) {
result <<= 8;
result |= shuffle[i] & mask;
}
@@ -2668,70 +2690,29 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
} // namespace
-void InstructionSelector::VisitS32x4Shuffle(Node* node) {
+void InstructionSelector::VisitS8x16Shuffle(Node* node) {
const uint8_t* shuffle = OpParameter<uint8_t*>(node);
- uint8_t mask = CanonicalizeShuffle(this, node, 4);
- ArchOpcode opcode = TryMatchArchShuffle<4>(
- shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask);
- if (opcode != kNoShuffle) {
- VisitRRRShuffle(this, opcode, node);
- return;
- }
+ uint8_t mask = CanonicalizeShuffle(this, node);
+ uint8_t shuffle32x4[4];
ArmOperandGenerator g(this);
- uint8_t lanes = TryMatchConcat<4>(shuffle, mask);
- if (lanes != 0) {
- Emit(kArmS8x16Concat, g.DefineAsRegister(node),
+ if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) {
+ Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
- g.UseImmediate(lanes * 4));
+ g.UseImmediate(Pack4Lanes(shuffle32x4, mask)));
return;
}
- Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
- g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
- g.UseImmediate(Pack4Lanes(shuffle, mask)));
-}
-
-void InstructionSelector::VisitS16x8Shuffle(Node* node) {
- const uint8_t* shuffle = OpParameter<uint8_t*>(node);
- uint8_t mask = CanonicalizeShuffle(this, node, 8);
- ArchOpcode opcode = TryMatchArchShuffle<8>(
- shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask);
- if (opcode != kNoShuffle) {
- VisitRRRShuffle(this, opcode, node);
- return;
- }
- ArmOperandGenerator g(this);
- Node* input0 = node->InputAt(0);
- Node* input1 = node->InputAt(1);
- uint8_t lanes = TryMatchConcat<8>(shuffle, mask);
- if (lanes != 0) {
- Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
- g.UseRegister(input1), g.UseImmediate(lanes * 2));
- return;
- }
- // Code generator uses vtbl, arrange sources to form a valid lookup table.
- InstructionOperand src0, src1;
- ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
- Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1,
- g.UseImmediate(Pack4Lanes(shuffle, mask)),
- g.UseImmediate(Pack4Lanes(shuffle + 4, mask)));
-}
-
-void InstructionSelector::VisitS8x16Shuffle(Node* node) {
- const uint8_t* shuffle = OpParameter<uint8_t*>(node);
- uint8_t mask = CanonicalizeShuffle(this, node, 16);
- ArchOpcode opcode = TryMatchArchShuffle<16>(
- shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask);
- if (opcode != kNoShuffle) {
+ ArchOpcode opcode;
+ if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles),
+ mask, &opcode)) {
VisitRRRShuffle(this, opcode, node);
return;
}
- ArmOperandGenerator g(this);
Node* input0 = node->InputAt(0);
Node* input1 = node->InputAt(1);
- uint8_t lanes = TryMatchConcat<16>(shuffle, mask);
- if (lanes != 0) {
+ uint8_t offset;
+ if (TryMatchConcat(shuffle, mask, &offset)) {
Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
- g.UseRegister(input1), g.UseImmediate(lanes));
+ g.UseRegister(input1), g.UseImmediate(offset));
return;
}
// Code generator uses vtbl, arrange sources to form a valid lookup table.
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | src/compiler/instruction-selector.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698