Index: src/compiler/arm/instruction-selector-arm.cc |
diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc |
index 4f421587a90f1db2f62da2a1c971da437ac0e462..1d94ed68f30bf72c8cb71e62ee465a3781836da6 100644 |
--- a/src/compiler/arm/instruction-selector-arm.cc |
+++ b/src/compiler/arm/instruction-selector-arm.cc |
@@ -2583,8 +2583,7 @@ static const ShuffleEntry<4> arch_s32x4_shuffles[] = { |
{{1, 3, 5, 7}, kArmS32x4UnzipRight}, |
{{0, 4, 2, 6}, kArmS32x4TransposeLeft}, |
{{1, 5, 3, 7}, kArmS32x4TransposeRight}, |
- {{1, 0, 3, 2}, kArmS32x2Reverse}, |
-}; |
+ {{1, 0, 3, 2}, kArmS32x2Reverse}}; |
static const ShuffleEntry<8> arch_s16x8_shuffles[] = { |
{{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft}, |
@@ -2594,8 +2593,7 @@ static const ShuffleEntry<8> arch_s16x8_shuffles[] = { |
{{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft}, |
{{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight}, |
{{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse}, |
- {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}, |
-}; |
+ {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}}; |
static const ShuffleEntry<16> arch_s8x16_shuffles[] = { |
{{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, |
@@ -2612,8 +2610,7 @@ static const ShuffleEntry<16> arch_s8x16_shuffles[] = { |
kArmS8x16TransposeRight}, |
{{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse}, |
{{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse}, |
- {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}, |
-}; |
+ {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}}; |
// Use a non-shuffle opcode to signal no match. |
static const ArchOpcode kNoShuffle = kArmS128Not; |
@@ -2683,6 +2680,27 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node, |
return mask; |
} |
+int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask) { |
+ int32_t result = 0; |
+ for (int i = 3; i >= 0; i--) { |
+ result <<= 8; |
+ result |= shuffle[i] & mask; |
+ } |
+ return result; |
+} |
+ |
+void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1, |
+ InstructionOperand* src0, InstructionOperand* src1) { |
+ if (input0 == input1) { |
+ // Unary, any q-register can be the table. |
+ *src0 = *src1 = g->UseRegister(input0); |
+ } else { |
+ // Binary, table registers must be consecutive. |
+ *src0 = g->UseFixed(input0, q0); |
+ *src1 = g->UseFixed(input1, q1); |
+ } |
+} |
+ |
} // namespace |
void InstructionSelector::VisitS32x4Shuffle(Node* node) { |
@@ -2702,7 +2720,9 @@ void InstructionSelector::VisitS32x4Shuffle(Node* node) { |
g.UseImmediate(lanes * 4)); |
return; |
} |
- // TODO(bbudge) vtbl to handle all other shuffles. |
+ Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), |
+ g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
+ g.UseImmediate(Pack4Lanes(shuffle, mask))); |
} |
void InstructionSelector::VisitS16x8Shuffle(Node* node) { |
@@ -2715,13 +2735,20 @@ void InstructionSelector::VisitS16x8Shuffle(Node* node) { |
return; |
} |
ArmOperandGenerator g(this); |
+ Node* input0 = node->InputAt(0); |
+ Node* input1 = node->InputAt(1); |
uint8_t lanes = TryMatchConcat<8>(shuffle, mask); |
if (lanes != 0) { |
- Emit(kArmS8x16Concat, g.DefineAsRegister(node), |
- g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
- g.UseImmediate(lanes * 2)); |
+ Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), |
+ g.UseRegister(input1), g.UseImmediate(lanes * 2)); |
+ return; |
} |
- // TODO(bbudge) vtbl to handle all other shuffles. |
+ // Code generator uses vtbl, arrange sources to form a valid lookup table. |
+ InstructionOperand src0, src1; |
+ ArrangeShuffleTable(&g, input0, input1, &src0, &src1); |
+ Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1, |
+ g.UseImmediate(Pack4Lanes(shuffle, mask)), |
+ g.UseImmediate(Pack4Lanes(shuffle + 4, mask))); |
} |
void InstructionSelector::VisitS8x16Shuffle(Node* node) { |
@@ -2734,13 +2761,22 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) { |
return; |
} |
ArmOperandGenerator g(this); |
+ Node* input0 = node->InputAt(0); |
+ Node* input1 = node->InputAt(1); |
uint8_t lanes = TryMatchConcat<16>(shuffle, mask); |
if (lanes != 0) { |
- Emit(kArmS8x16Concat, g.DefineAsRegister(node), |
- g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
- g.UseImmediate(lanes)); |
+ Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), |
+ g.UseRegister(input1), g.UseImmediate(lanes)); |
+ return; |
} |
- // TODO(bbudge) vtbl to handle all other shuffles. |
+ // Code generator uses vtbl, arrange sources to form a valid lookup table. |
+ InstructionOperand src0, src1; |
+ ArrangeShuffleTable(&g, input0, input1, &src0, &src1); |
+ Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1, |
+ g.UseImmediate(Pack4Lanes(shuffle, mask)), |
+ g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), |
+ g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), |
+ g.UseImmediate(Pack4Lanes(shuffle + 12, mask))); |
} |
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { |