Chromium Code Reviews| Index: src/compiler/arm/instruction-selector-arm.cc |
| diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc |
| index 83a977867b6d6659206e0f365874031e46b6c171..846ec80edc9181ee9c0c3ad903ced6e3e739a45e 100644 |
| --- a/src/compiler/arm/instruction-selector-arm.cc |
| +++ b/src/compiler/arm/instruction-selector-arm.cc |
| @@ -2534,33 +2534,69 @@ void InstructionSelector::VisitS128Select(Node* node) { |
| g.UseRegister(node->InputAt(2))); |
| } |
| +// Tries to match 8x16 byte shuffle to equivalent 32x4 word shuffle. |
| +bool TryMatch32x4Shuffle(const uint8_t* shuffle, uint8_t* shuffle32x4) { |
| + for (int i = 0; i < 4; i++) { |
|
Mircea Trofin
2017/06/13 22:21:45
is there a constant instead of "4" (could be more
bbudge
2017/06/13 23:07:54
Done. Defined constants kLanes and kLaneSize, both
|
| + if (shuffle[i * 4] % 4 != 0) return false; |
| + for (int j = 1; j < 4; j++) { |
|
Mircea Trofin
2017/06/13 22:21:45
++j
bbudge
2017/06/13 23:07:54
Done here and everywhere else.
|
| + if (shuffle[i * 4 + j] - shuffle[i * 4 + j - 1] != 1) return false; |
| + } |
| + shuffle32x4[i] = shuffle[i * 4] / 4; |
| + } |
| + return true; |
| +} |
| + |
| +// Tries to match byte shuffle to concatenate (vext) operation. |
| +bool TryMatchConcat(const uint8_t* shuffle, uint8_t mask, uint8_t* bias) { |
| + uint8_t start = shuffle[0]; |
| + int i = 1; |
| + for (; i < 16 - start; i++) { |
|
Mircea Trofin
2017/06/13 22:21:45
++i
also, if there were a ctant for 16?
Could we g
bbudge
2017/06/13 23:07:54
Done, used kSimd128Size.
|
| + if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return false; |
| + } |
| + uint8_t wrap = 16; |
| + for (; i < 16; i++, wrap++) { |
| + if ((shuffle[i] & mask) != (wrap & mask)) return false; |
| + } |
| + *bias = start; |
| + return true; |
| +} |
| + |
| namespace { |
| -template <int LANES> |
| struct ShuffleEntry { |
| - uint8_t shuffle[LANES]; |
| + uint8_t shuffle[16]; |
| ArchOpcode opcode; |
| }; |
| -static const ShuffleEntry<4> arch_s32x4_shuffles[] = { |
| - {{0, 4, 1, 5}, kArmS32x4ZipLeft}, |
| - {{2, 6, 3, 7}, kArmS32x4ZipRight}, |
| - {{0, 2, 4, 6}, kArmS32x4UnzipLeft}, |
| - {{1, 3, 5, 7}, kArmS32x4UnzipRight}, |
| - {{0, 4, 2, 6}, kArmS32x4TransposeLeft}, |
| - {{1, 5, 3, 7}, kArmS32x4TransposeRight}, |
| - {{1, 0, 3, 2}, kArmS32x2Reverse}}; |
| - |
| -static const ShuffleEntry<8> arch_s16x8_shuffles[] = { |
| - {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft}, |
| - {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight}, |
| - {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft}, |
| - {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight}, |
| - {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft}, |
| - {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight}, |
| - {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse}, |
| - {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}}; |
| - |
| -static const ShuffleEntry<16> arch_s8x16_shuffles[] = { |
| +static const ShuffleEntry arch_shuffles[] = { |
| + {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}, |
| + kArmS32x4ZipLeft}, |
| + {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}, |
| + kArmS32x4ZipRight}, |
| + {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}, |
| + kArmS32x4UnzipLeft}, |
| + {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}, |
| + kArmS32x4UnzipRight}, |
| + {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}, |
| + kArmS32x4TransposeLeft}, |
| + {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}, |
| + kArmS32x4TransposeRight}, |
| + {{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, kArmS32x2Reverse}, |
| + |
| + {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}, |
| + kArmS16x8ZipLeft}, |
| + {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}, |
| + kArmS16x8ZipRight}, |
| + {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}, |
| + kArmS16x8UnzipLeft}, |
| + {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}, |
| + kArmS16x8UnzipRight}, |
| + {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}, |
| + kArmS16x8TransposeLeft}, |
| + {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}, |
| + kArmS16x8TransposeRight}, |
| + {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kArmS16x4Reverse}, |
| + {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kArmS16x2Reverse}, |
| + |
| {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, |
| kArmS8x16ZipLeft}, |
| {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, |
| @@ -2577,39 +2613,22 @@ static const ShuffleEntry<16> arch_s8x16_shuffles[] = { |
| {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse}, |
| {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}}; |
| -// Use a non-shuffle opcode to signal no match. |
| -static const ArchOpcode kNoShuffle = kArmS128Not; |
| - |
| -template <int LANES> |
| -ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle, |
| - const ShuffleEntry<LANES>* table, |
| - size_t num_entries, uint8_t mask) { |
| +bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, |
| + size_t num_entries, uint8_t mask, ArchOpcode* opcode) { |
| for (size_t i = 0; i < num_entries; i++) { |
| - const ShuffleEntry<LANES>& entry = table[i]; |
| + const ShuffleEntry& entry = table[i]; |
| int j = 0; |
| - for (; j < LANES; j++) { |
| + for (; j < 16; j++) { |
| if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) { |
| break; |
| } |
| } |
| - if (j == LANES) return entry.opcode; |
| - } |
| - return kNoShuffle; |
| -} |
| - |
| -// Returns the bias if shuffle is a concatenation, 0 otherwise. |
| -template <int LANES> |
| -uint8_t TryMatchConcat(const uint8_t* shuffle, uint8_t mask) { |
| - uint8_t start = shuffle[0]; |
| - int i = 1; |
| - for (; i < LANES - start; i++) { |
| - if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return 0; |
| - } |
| - uint8_t wrap = LANES; |
| - for (; i < LANES; i++, wrap++) { |
| - if ((shuffle[i] & mask) != (wrap & mask)) return 0; |
| + if (j == 16) { |
| + *opcode = entry.opcode; |
| + return true; |
| + } |
| } |
| - return start; |
| + return false; |
| } |
| // Canonicalize shuffles to make pattern matching simpler. Returns a mask that |
| @@ -2668,70 +2687,29 @@ void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1, |
| } // namespace |
| -void InstructionSelector::VisitS32x4Shuffle(Node* node) { |
| +void InstructionSelector::VisitS8x16Shuffle(Node* node) { |
| const uint8_t* shuffle = OpParameter<uint8_t*>(node); |
| - uint8_t mask = CanonicalizeShuffle(this, node, 4); |
| - ArchOpcode opcode = TryMatchArchShuffle<4>( |
| - shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask); |
| - if (opcode != kNoShuffle) { |
| - VisitRRRShuffle(this, opcode, node); |
| - return; |
| - } |
| + uint8_t mask = CanonicalizeShuffle(this, node, 16); |
| + uint8_t shuffle32x4[4]; |
| ArmOperandGenerator g(this); |
| - uint8_t lanes = TryMatchConcat<4>(shuffle, mask); |
| - if (lanes != 0) { |
| - Emit(kArmS8x16Concat, g.DefineAsRegister(node), |
| + if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { |
| + Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), |
| g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
| - g.UseImmediate(lanes * 4)); |
| + g.UseImmediate(Pack4Lanes(shuffle32x4, mask))); |
| return; |
| } |
| - Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), |
| - g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
| - g.UseImmediate(Pack4Lanes(shuffle, mask))); |
| -} |
| - |
| -void InstructionSelector::VisitS16x8Shuffle(Node* node) { |
| - const uint8_t* shuffle = OpParameter<uint8_t*>(node); |
| - uint8_t mask = CanonicalizeShuffle(this, node, 8); |
| - ArchOpcode opcode = TryMatchArchShuffle<8>( |
| - shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask); |
| - if (opcode != kNoShuffle) { |
| - VisitRRRShuffle(this, opcode, node); |
| - return; |
| - } |
| - ArmOperandGenerator g(this); |
| - Node* input0 = node->InputAt(0); |
| - Node* input1 = node->InputAt(1); |
| - uint8_t lanes = TryMatchConcat<8>(shuffle, mask); |
| - if (lanes != 0) { |
| - Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), |
| - g.UseRegister(input1), g.UseImmediate(lanes * 2)); |
| - return; |
| - } |
| - // Code generator uses vtbl, arrange sources to form a valid lookup table. |
| - InstructionOperand src0, src1; |
| - ArrangeShuffleTable(&g, input0, input1, &src0, &src1); |
| - Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1, |
| - g.UseImmediate(Pack4Lanes(shuffle, mask)), |
| - g.UseImmediate(Pack4Lanes(shuffle + 4, mask))); |
| -} |
| - |
| -void InstructionSelector::VisitS8x16Shuffle(Node* node) { |
| - const uint8_t* shuffle = OpParameter<uint8_t*>(node); |
| - uint8_t mask = CanonicalizeShuffle(this, node, 16); |
| - ArchOpcode opcode = TryMatchArchShuffle<16>( |
| - shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask); |
| - if (opcode != kNoShuffle) { |
| + ArchOpcode opcode; |
| + if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), |
| + mask, &opcode)) { |
| VisitRRRShuffle(this, opcode, node); |
| return; |
| } |
| - ArmOperandGenerator g(this); |
| Node* input0 = node->InputAt(0); |
| Node* input1 = node->InputAt(1); |
| - uint8_t lanes = TryMatchConcat<16>(shuffle, mask); |
| - if (lanes != 0) { |
| + uint8_t bias; |
| + if (TryMatchConcat(shuffle, mask, &bias)) { |
| Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), |
| - g.UseRegister(input1), g.UseImmediate(lanes)); |
| + g.UseRegister(input1), g.UseImmediate(bias)); |
| return; |
| } |
| // Code generator uses vtbl, arrange sources to form a valid lookup table. |