| Index: src/compiler/arm/instruction-selector-arm.cc
|
| diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc
|
| index 4f421587a90f1db2f62da2a1c971da437ac0e462..1d94ed68f30bf72c8cb71e62ee465a3781836da6 100644
|
| --- a/src/compiler/arm/instruction-selector-arm.cc
|
| +++ b/src/compiler/arm/instruction-selector-arm.cc
|
| @@ -2583,8 +2583,7 @@ static const ShuffleEntry<4> arch_s32x4_shuffles[] = {
|
| {{1, 3, 5, 7}, kArmS32x4UnzipRight},
|
| {{0, 4, 2, 6}, kArmS32x4TransposeLeft},
|
| {{1, 5, 3, 7}, kArmS32x4TransposeRight},
|
| - {{1, 0, 3, 2}, kArmS32x2Reverse},
|
| -};
|
| + {{1, 0, 3, 2}, kArmS32x2Reverse}};
|
|
|
| static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
|
| {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft},
|
| @@ -2594,8 +2593,7 @@ static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
|
| {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft},
|
| {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight},
|
| {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse},
|
| - {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse},
|
| -};
|
| + {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}};
|
|
|
| static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
|
| {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
|
| @@ -2612,8 +2610,7 @@ static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
|
| kArmS8x16TransposeRight},
|
| {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse},
|
| {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
|
| - {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse},
|
| -};
|
| + {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
|
|
|
| // Use a non-shuffle opcode to signal no match.
|
| static const ArchOpcode kNoShuffle = kArmS128Not;
|
| @@ -2683,6 +2680,27 @@ uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node,
|
| return mask;
|
| }
|
|
|
| +int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask) {
|
| + int32_t result = 0;
|
| + for (int i = 3; i >= 0; i--) {
|
| + result <<= 8;
|
| + result |= shuffle[i] & mask;
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
|
| + InstructionOperand* src0, InstructionOperand* src1) {
|
| + if (input0 == input1) {
|
| + // Unary, any q-register can be the table.
|
| + *src0 = *src1 = g->UseRegister(input0);
|
| + } else {
|
| + // Binary, table registers must be consecutive.
|
| + *src0 = g->UseFixed(input0, q0);
|
| + *src1 = g->UseFixed(input1, q1);
|
| + }
|
| +}
|
| +
|
| } // namespace
|
|
|
| void InstructionSelector::VisitS32x4Shuffle(Node* node) {
|
| @@ -2702,7 +2720,9 @@ void InstructionSelector::VisitS32x4Shuffle(Node* node) {
|
| g.UseImmediate(lanes * 4));
|
| return;
|
| }
|
| - // TODO(bbudge) vtbl to handle all other shuffles.
|
| + Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
|
| + g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
| + g.UseImmediate(Pack4Lanes(shuffle, mask)));
|
| }
|
|
|
| void InstructionSelector::VisitS16x8Shuffle(Node* node) {
|
| @@ -2715,13 +2735,20 @@ void InstructionSelector::VisitS16x8Shuffle(Node* node) {
|
| return;
|
| }
|
| ArmOperandGenerator g(this);
|
| + Node* input0 = node->InputAt(0);
|
| + Node* input1 = node->InputAt(1);
|
| uint8_t lanes = TryMatchConcat<8>(shuffle, mask);
|
| if (lanes != 0) {
|
| - Emit(kArmS8x16Concat, g.DefineAsRegister(node),
|
| - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
| - g.UseImmediate(lanes * 2));
|
| + Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
|
| + g.UseRegister(input1), g.UseImmediate(lanes * 2));
|
| + return;
|
| }
|
| - // TODO(bbudge) vtbl to handle all other shuffles.
|
| + // Code generator uses vtbl, arrange sources to form a valid lookup table.
|
| + InstructionOperand src0, src1;
|
| + ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
|
| + Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1,
|
| + g.UseImmediate(Pack4Lanes(shuffle, mask)),
|
| + g.UseImmediate(Pack4Lanes(shuffle + 4, mask)));
|
| }
|
|
|
| void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
| @@ -2734,13 +2761,22 @@ void InstructionSelector::VisitS8x16Shuffle(Node* node) {
|
| return;
|
| }
|
| ArmOperandGenerator g(this);
|
| + Node* input0 = node->InputAt(0);
|
| + Node* input1 = node->InputAt(1);
|
| uint8_t lanes = TryMatchConcat<16>(shuffle, mask);
|
| if (lanes != 0) {
|
| - Emit(kArmS8x16Concat, g.DefineAsRegister(node),
|
| - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
|
| - g.UseImmediate(lanes));
|
| + Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
|
| + g.UseRegister(input1), g.UseImmediate(lanes));
|
| + return;
|
| }
|
| - // TODO(bbudge) vtbl to handle all other shuffles.
|
| + // Code generator uses vtbl, arrange sources to form a valid lookup table.
|
| + InstructionOperand src0, src1;
|
| + ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
|
| + Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1,
|
| + g.UseImmediate(Pack4Lanes(shuffle, mask)),
|
| + g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
|
| + g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
|
| + g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
|
| }
|
|
|
| void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
|
|
|