OLD | NEW |
---|---|
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/base/adapters.h" | 5 #include "src/base/adapters.h" |
6 #include "src/base/bits.h" | 6 #include "src/base/bits.h" |
7 #include "src/compiler/instruction-selector-impl.h" | 7 #include "src/compiler/instruction-selector-impl.h" |
8 #include "src/compiler/node-matchers.h" | 8 #include "src/compiler/node-matchers.h" |
9 #include "src/compiler/node-properties.h" | 9 #include "src/compiler/node-properties.h" |
10 | 10 |
(...skipping 2516 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2527 SIMD_BINOP_LIST(SIMD_VISIT_BINOP) | 2527 SIMD_BINOP_LIST(SIMD_VISIT_BINOP) |
2528 #undef SIMD_VISIT_BINOP | 2528 #undef SIMD_VISIT_BINOP |
2529 | 2529 |
2530 void InstructionSelector::VisitS128Select(Node* node) { | 2530 void InstructionSelector::VisitS128Select(Node* node) { |
2531 ArmOperandGenerator g(this); | 2531 ArmOperandGenerator g(this); |
2532 Emit(kArmS128Select, g.DefineSameAsFirst(node), | 2532 Emit(kArmS128Select, g.DefineSameAsFirst(node), |
2533 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | 2533 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
2534 g.UseRegister(node->InputAt(2))); | 2534 g.UseRegister(node->InputAt(2))); |
2535 } | 2535 } |
2536 | 2536 |
2537 // Tries to match 8x16 byte shuffle to equivalent 32x4 word shuffle. | |
2538 bool TryMatch32x4Shuffle(const uint8_t* shuffle, uint8_t* shuffle32x4) { | |
2539 for (int i = 0; i < 4; i++) { | |
Mircea Trofin
2017/06/13 22:21:45
is there a constant instead of "4" (could be more
bbudge
2017/06/13 23:07:54
Done. Defined constants kLanes and kLaneSize, both
| |
2540 if (shuffle[i * 4] % 4 != 0) return false; | |
2541 for (int j = 1; j < 4; j++) { | |
Mircea Trofin
2017/06/13 22:21:45
++j
bbudge
2017/06/13 23:07:54
Done here and everywhere else.
| |
2542 if (shuffle[i * 4 + j] - shuffle[i * 4 + j - 1] != 1) return false; | |
2543 } | |
2544 shuffle32x4[i] = shuffle[i * 4] / 4; | |
2545 } | |
2546 return true; | |
2547 } | |
2548 | |
2549 // Tries to match byte shuffle to concatenate (vext) operation. | |
2550 bool TryMatchConcat(const uint8_t* shuffle, uint8_t mask, uint8_t* bias) { | |
2551 uint8_t start = shuffle[0]; | |
2552 int i = 1; | |
2553 for (; i < 16 - start; i++) { | |
Mircea Trofin
2017/06/13 22:21:45
++i
also, if there were a ctant for 16?
Could we g
bbudge
2017/06/13 23:07:54
Done, used kSimd128Size.
| |
2554 if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return false; | |
2555 } | |
2556 uint8_t wrap = 16; | |
2557 for (; i < 16; i++, wrap++) { | |
2558 if ((shuffle[i] & mask) != (wrap & mask)) return false; | |
2559 } | |
2560 *bias = start; | |
2561 return true; | |
2562 } | |
2563 | |
2537 namespace { | 2564 namespace { |
2538 template <int LANES> | |
2539 struct ShuffleEntry { | 2565 struct ShuffleEntry { |
2540 uint8_t shuffle[LANES]; | 2566 uint8_t shuffle[16]; |
2541 ArchOpcode opcode; | 2567 ArchOpcode opcode; |
2542 }; | 2568 }; |
2543 | 2569 |
2544 static const ShuffleEntry<4> arch_s32x4_shuffles[] = { | 2570 static const ShuffleEntry arch_shuffles[] = { |
2545 {{0, 4, 1, 5}, kArmS32x4ZipLeft}, | 2571 {{0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}, |
2546 {{2, 6, 3, 7}, kArmS32x4ZipRight}, | 2572 kArmS32x4ZipLeft}, |
2547 {{0, 2, 4, 6}, kArmS32x4UnzipLeft}, | 2573 {{8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}, |
2548 {{1, 3, 5, 7}, kArmS32x4UnzipRight}, | 2574 kArmS32x4ZipRight}, |
2549 {{0, 4, 2, 6}, kArmS32x4TransposeLeft}, | 2575 {{0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}, |
2550 {{1, 5, 3, 7}, kArmS32x4TransposeRight}, | 2576 kArmS32x4UnzipLeft}, |
2551 {{1, 0, 3, 2}, kArmS32x2Reverse}}; | 2577 {{4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}, |
2578 kArmS32x4UnzipRight}, | |
2579 {{0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}, | |
2580 kArmS32x4TransposeLeft}, | |
2581 {{4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}, | |
2582 kArmS32x4TransposeRight}, | |
2583 {{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, kArmS32x2Reverse}, | |
2552 | 2584 |
2553 static const ShuffleEntry<8> arch_s16x8_shuffles[] = { | 2585 {{0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}, |
2554 {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft}, | 2586 kArmS16x8ZipLeft}, |
2555 {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight}, | 2587 {{8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}, |
2556 {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft}, | 2588 kArmS16x8ZipRight}, |
2557 {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight}, | 2589 {{0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}, |
2558 {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft}, | 2590 kArmS16x8UnzipLeft}, |
2559 {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight}, | 2591 {{2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}, |
2560 {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse}, | 2592 kArmS16x8UnzipRight}, |
2561 {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}}; | 2593 {{0, 1, 16, 17, 4, 5, 20, 21, 8, 9, 24, 25, 12, 13, 28, 29}, |
2594 kArmS16x8TransposeLeft}, | |
2595 {{2, 3, 18, 19, 6, 7, 22, 23, 10, 11, 26, 27, 14, 15, 30, 31}, | |
2596 kArmS16x8TransposeRight}, | |
2597 {{6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9}, kArmS16x4Reverse}, | |
2598 {{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, kArmS16x2Reverse}, | |
2562 | 2599 |
2563 static const ShuffleEntry<16> arch_s8x16_shuffles[] = { | |
2564 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, | 2600 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, |
2565 kArmS8x16ZipLeft}, | 2601 kArmS8x16ZipLeft}, |
2566 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, | 2602 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, |
2567 kArmS8x16ZipRight}, | 2603 kArmS8x16ZipRight}, |
2568 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, | 2604 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, |
2569 kArmS8x16UnzipLeft}, | 2605 kArmS8x16UnzipLeft}, |
2570 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}, | 2606 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}, |
2571 kArmS8x16UnzipRight}, | 2607 kArmS8x16UnzipRight}, |
2572 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}, | 2608 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}, |
2573 kArmS8x16TransposeLeft}, | 2609 kArmS8x16TransposeLeft}, |
2574 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}, | 2610 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}, |
2575 kArmS8x16TransposeRight}, | 2611 kArmS8x16TransposeRight}, |
2576 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse}, | 2612 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse}, |
2577 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse}, | 2613 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse}, |
2578 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}}; | 2614 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}}; |
2579 | 2615 |
2580 // Use a non-shuffle opcode to signal no match. | 2616 bool TryMatchArchShuffle(const uint8_t* shuffle, const ShuffleEntry* table, |
2581 static const ArchOpcode kNoShuffle = kArmS128Not; | 2617 size_t num_entries, uint8_t mask, ArchOpcode* opcode) { |
2582 | |
2583 template <int LANES> | |
2584 ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle, | |
2585 const ShuffleEntry<LANES>* table, | |
2586 size_t num_entries, uint8_t mask) { | |
2587 for (size_t i = 0; i < num_entries; i++) { | 2618 for (size_t i = 0; i < num_entries; i++) { |
2588 const ShuffleEntry<LANES>& entry = table[i]; | 2619 const ShuffleEntry& entry = table[i]; |
2589 int j = 0; | 2620 int j = 0; |
2590 for (; j < LANES; j++) { | 2621 for (; j < 16; j++) { |
2591 if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) { | 2622 if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) { |
2592 break; | 2623 break; |
2593 } | 2624 } |
2594 } | 2625 } |
2595 if (j == LANES) return entry.opcode; | 2626 if (j == 16) { |
2627 *opcode = entry.opcode; | |
2628 return true; | |
2629 } | |
2596 } | 2630 } |
2597 return kNoShuffle; | 2631 return false; |
2598 } | |
2599 | |
2600 // Returns the bias if shuffle is a concatenation, 0 otherwise. | |
2601 template <int LANES> | |
2602 uint8_t TryMatchConcat(const uint8_t* shuffle, uint8_t mask) { | |
2603 uint8_t start = shuffle[0]; | |
2604 int i = 1; | |
2605 for (; i < LANES - start; i++) { | |
2606 if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return 0; | |
2607 } | |
2608 uint8_t wrap = LANES; | |
2609 for (; i < LANES; i++, wrap++) { | |
2610 if ((shuffle[i] & mask) != (wrap & mask)) return 0; | |
2611 } | |
2612 return start; | |
2613 } | 2632 } |
2614 | 2633 |
2615 // Canonicalize shuffles to make pattern matching simpler. Returns a mask that | 2634 // Canonicalize shuffles to make pattern matching simpler. Returns a mask that |
2616 // will ignore the high bit of indices in some cases. | 2635 // will ignore the high bit of indices in some cases. |
2617 uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node, | 2636 uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node, |
2618 int num_lanes) { | 2637 int num_lanes) { |
2619 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | 2638 const uint8_t* shuffle = OpParameter<uint8_t*>(node); |
2620 uint8_t mask = 0xff; | 2639 uint8_t mask = 0xff; |
2621 // If shuffle is unary, set 'mask' to ignore the high bit of the indices. | 2640 // If shuffle is unary, set 'mask' to ignore the high bit of the indices. |
2622 // Replace any unused source with the other. | 2641 // Replace any unused source with the other. |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2661 *src0 = *src1 = g->UseRegister(input0); | 2680 *src0 = *src1 = g->UseRegister(input0); |
2662 } else { | 2681 } else { |
2663 // Binary, table registers must be consecutive. | 2682 // Binary, table registers must be consecutive. |
2664 *src0 = g->UseFixed(input0, q0); | 2683 *src0 = g->UseFixed(input0, q0); |
2665 *src1 = g->UseFixed(input1, q1); | 2684 *src1 = g->UseFixed(input1, q1); |
2666 } | 2685 } |
2667 } | 2686 } |
2668 | 2687 |
2669 } // namespace | 2688 } // namespace |
2670 | 2689 |
2671 void InstructionSelector::VisitS32x4Shuffle(Node* node) { | |
2672 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | |
2673 uint8_t mask = CanonicalizeShuffle(this, node, 4); | |
2674 ArchOpcode opcode = TryMatchArchShuffle<4>( | |
2675 shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask); | |
2676 if (opcode != kNoShuffle) { | |
2677 VisitRRRShuffle(this, opcode, node); | |
2678 return; | |
2679 } | |
2680 ArmOperandGenerator g(this); | |
2681 uint8_t lanes = TryMatchConcat<4>(shuffle, mask); | |
2682 if (lanes != 0) { | |
2683 Emit(kArmS8x16Concat, g.DefineAsRegister(node), | |
2684 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | |
2685 g.UseImmediate(lanes * 4)); | |
2686 return; | |
2687 } | |
2688 Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), | |
2689 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | |
2690 g.UseImmediate(Pack4Lanes(shuffle, mask))); | |
2691 } | |
2692 | |
2693 void InstructionSelector::VisitS16x8Shuffle(Node* node) { | |
2694 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | |
2695 uint8_t mask = CanonicalizeShuffle(this, node, 8); | |
2696 ArchOpcode opcode = TryMatchArchShuffle<8>( | |
2697 shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask); | |
2698 if (opcode != kNoShuffle) { | |
2699 VisitRRRShuffle(this, opcode, node); | |
2700 return; | |
2701 } | |
2702 ArmOperandGenerator g(this); | |
2703 Node* input0 = node->InputAt(0); | |
2704 Node* input1 = node->InputAt(1); | |
2705 uint8_t lanes = TryMatchConcat<8>(shuffle, mask); | |
2706 if (lanes != 0) { | |
2707 Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), | |
2708 g.UseRegister(input1), g.UseImmediate(lanes * 2)); | |
2709 return; | |
2710 } | |
2711 // Code generator uses vtbl, arrange sources to form a valid lookup table. | |
2712 InstructionOperand src0, src1; | |
2713 ArrangeShuffleTable(&g, input0, input1, &src0, &src1); | |
2714 Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1, | |
2715 g.UseImmediate(Pack4Lanes(shuffle, mask)), | |
2716 g.UseImmediate(Pack4Lanes(shuffle + 4, mask))); | |
2717 } | |
2718 | |
2719 void InstructionSelector::VisitS8x16Shuffle(Node* node) { | 2690 void InstructionSelector::VisitS8x16Shuffle(Node* node) { |
2720 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | 2691 const uint8_t* shuffle = OpParameter<uint8_t*>(node); |
2721 uint8_t mask = CanonicalizeShuffle(this, node, 16); | 2692 uint8_t mask = CanonicalizeShuffle(this, node, 16); |
2722 ArchOpcode opcode = TryMatchArchShuffle<16>( | 2693 uint8_t shuffle32x4[4]; |
2723 shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask); | 2694 ArmOperandGenerator g(this); |
2724 if (opcode != kNoShuffle) { | 2695 if (TryMatch32x4Shuffle(shuffle, shuffle32x4)) { |
2696 Emit(kArmS32x4Shuffle, g.DefineAsRegister(node), | |
2697 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | |
2698 g.UseImmediate(Pack4Lanes(shuffle32x4, mask))); | |
2699 return; | |
2700 } | |
2701 ArchOpcode opcode; | |
2702 if (TryMatchArchShuffle(shuffle, arch_shuffles, arraysize(arch_shuffles), | |
2703 mask, &opcode)) { | |
2725 VisitRRRShuffle(this, opcode, node); | 2704 VisitRRRShuffle(this, opcode, node); |
2726 return; | 2705 return; |
2727 } | 2706 } |
2728 ArmOperandGenerator g(this); | |
2729 Node* input0 = node->InputAt(0); | 2707 Node* input0 = node->InputAt(0); |
2730 Node* input1 = node->InputAt(1); | 2708 Node* input1 = node->InputAt(1); |
2731 uint8_t lanes = TryMatchConcat<16>(shuffle, mask); | 2709 uint8_t bias; |
2732 if (lanes != 0) { | 2710 if (TryMatchConcat(shuffle, mask, &bias)) { |
2733 Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), | 2711 Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0), |
2734 g.UseRegister(input1), g.UseImmediate(lanes)); | 2712 g.UseRegister(input1), g.UseImmediate(bias)); |
2735 return; | 2713 return; |
2736 } | 2714 } |
2737 // Code generator uses vtbl, arrange sources to form a valid lookup table. | 2715 // Code generator uses vtbl, arrange sources to form a valid lookup table. |
2738 InstructionOperand src0, src1; | 2716 InstructionOperand src0, src1; |
2739 ArrangeShuffleTable(&g, input0, input1, &src0, &src1); | 2717 ArrangeShuffleTable(&g, input0, input1, &src0, &src1); |
2740 Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1, | 2718 Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1, |
2741 g.UseImmediate(Pack4Lanes(shuffle, mask)), | 2719 g.UseImmediate(Pack4Lanes(shuffle, mask)), |
2742 g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), | 2720 g.UseImmediate(Pack4Lanes(shuffle + 4, mask)), |
2743 g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), | 2721 g.UseImmediate(Pack4Lanes(shuffle + 8, mask)), |
2744 g.UseImmediate(Pack4Lanes(shuffle + 12, mask))); | 2722 g.UseImmediate(Pack4Lanes(shuffle + 12, mask))); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2785 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); | 2763 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); |
2786 req_aligned[0] = MachineType::Float32(); | 2764 req_aligned[0] = MachineType::Float32(); |
2787 req_aligned[1] = MachineType::Float64(); | 2765 req_aligned[1] = MachineType::Float64(); |
2788 return MachineOperatorBuilder::AlignmentRequirements:: | 2766 return MachineOperatorBuilder::AlignmentRequirements:: |
2789 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); | 2767 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); |
2790 } | 2768 } |
2791 | 2769 |
2792 } // namespace compiler | 2770 } // namespace compiler |
2793 } // namespace internal | 2771 } // namespace internal |
2794 } // namespace v8 | 2772 } // namespace v8 |
OLD | NEW |