Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Unified Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2856363003: [ARM] Implement irregular vector shuffles for SIMD. (Closed)
Patch Set: Review comments. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | test/cctest/wasm/test-run-wasm-simd.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/compiler/arm/instruction-selector-arm.cc
diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc
index 4f421587a90f1db2f62da2a1c971da437ac0e462..1d94ed68f30bf72c8cb71e62ee465a3781836da6 100644
--- a/src/compiler/arm/instruction-selector-arm.cc
+++ b/src/compiler/arm/instruction-selector-arm.cc
@@ -2583,8 +2583,7 @@ static const ShuffleEntry<4> arch_s32x4_shuffles[] = {
{{1, 3, 5, 7}, kArmS32x4UnzipRight},
{{0, 4, 2, 6}, kArmS32x4TransposeLeft},
{{1, 5, 3, 7}, kArmS32x4TransposeRight},
- {{1, 0, 3, 2}, kArmS32x2Reverse},
-};
+ {{1, 0, 3, 2}, kArmS32x2Reverse}};
static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
{{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft},
@@ -2594,8 +2593,7 @@ static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
{{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft},
{{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight},
{{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse},
- {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse},
-};
+ {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}};
static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
@@ -2612,8 +2610,7 @@ static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
kArmS8x16TransposeRight},
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse},
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
- {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse},
-};
+ {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
// Use a non-shuffle opcode to signal no match.
static const ArchOpcode kNoShuffle = kArmS128Not;
@@ -2683,6 +2680,27 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
return mask;
}
+int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask) {
+ int32_t result = 0;
+ for (int i = 3; i >= 0; i--) {
+ result <<= 8;
+ result |= shuffle[i] & mask;
+ }
+ return result;
+}
+
+void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
+ InstructionOperand* src0, InstructionOperand* src1) {
+ if (input0 == input1) {
+ // Unary, any q-register can be the table.
+ *src0 = *src1 = g->UseRegister(input0);
+ } else {
+ // Binary, table registers must be consecutive.
+ *src0 = g->UseFixed(input0, q0);
+ *src1 = g->UseFixed(input1, q1);
+ }
+}
+
} // namespace
void InstructionSelector::VisitS32x4Shuffle(Node* node) {
@@ -2702,7 +2720,9 @@ void InstructionSelector::VisitS32x4Shuffle(Node* node) {
g.UseImmediate(lanes * 4));
return;
}
- // TODO(bbudge) vtbl to handle all other shuffles.
+ Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
+ g.UseImmediate(Pack4Lanes(shuffle, mask)));
}
void InstructionSelector::VisitS16x8Shuffle(Node* node) {
@@ -2715,13 +2735,20 @@ void InstructionSelector::VisitS16x8Shuffle(Node* node) {
return;
}
ArmOperandGenerator g(this);
+ Node* input0 = node->InputAt(0);
+ Node* input1 = node->InputAt(1);
uint8_t lanes = TryMatchConcat<8>(shuffle, mask);
if (lanes != 0) {
- Emit(kArmS8x16Concat, g.DefineAsRegister(node),
- g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
- g.UseImmediate(lanes * 2));
+ Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
+ g.UseRegister(input1), g.UseImmediate(lanes * 2));
+ return;
}
- // TODO(bbudge) vtbl to handle all other shuffles.
+ // Code generator uses vtbl, arrange sources to form a valid lookup table.
+ InstructionOperand src0, src1;
+ ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
+ Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1,
+ g.UseImmediate(Pack4Lanes(shuffle, mask)),
+ g.UseImmediate(Pack4Lanes(shuffle + 4, mask)));
}
void InstructionSelector::VisitS8x16Shuffle(Node* node) {
@@ -2734,13 +2761,22 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
return;
}
ArmOperandGenerator g(this);
+ Node* input0 = node->InputAt(0);
+ Node* input1 = node->InputAt(1);
uint8_t lanes = TryMatchConcat<16>(shuffle, mask);
if (lanes != 0) {
- Emit(kArmS8x16Concat, g.DefineAsRegister(node),
- g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
- g.UseImmediate(lanes));
+ Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
+ g.UseRegister(input1), g.UseImmediate(lanes));
+ return;
}
- // TODO(bbudge) vtbl to handle all other shuffles.
+ // Code generator uses vtbl, arrange sources to form a valid lookup table.
+ InstructionOperand src0, src1;
+ ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
+ Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1,
+ g.UseImmediate(Pack4Lanes(shuffle, mask)),
+ g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
+ g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
+ g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | test/cctest/wasm/test-run-wasm-simd.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698