Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(291)

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2856363003: [ARM] Implement irregular vector shuffles for SIMD. (Closed)
Patch Set: Review comments. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | test/cctest/wasm/test-run-wasm-simd.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/base/adapters.h" 5 #include "src/base/adapters.h"
6 #include "src/base/bits.h" 6 #include "src/base/bits.h"
7 #include "src/compiler/instruction-selector-impl.h" 7 #include "src/compiler/instruction-selector-impl.h"
8 #include "src/compiler/node-matchers.h" 8 #include "src/compiler/node-matchers.h"
9 #include "src/compiler/node-properties.h" 9 #include "src/compiler/node-properties.h"
10 10
(...skipping 2565 matching lines...) Expand 10 before | Expand all | Expand 10 after
2576 ArchOpcode opcode; 2576 ArchOpcode opcode;
2577 }; 2577 };
2578 2578
2579 static const ShuffleEntry<4> arch_s32x4_shuffles[] = { 2579 static const ShuffleEntry<4> arch_s32x4_shuffles[] = {
2580 {{0, 4, 1, 5}, kArmS32x4ZipLeft}, 2580 {{0, 4, 1, 5}, kArmS32x4ZipLeft},
2581 {{2, 6, 3, 7}, kArmS32x4ZipRight}, 2581 {{2, 6, 3, 7}, kArmS32x4ZipRight},
2582 {{0, 2, 4, 6}, kArmS32x4UnzipLeft}, 2582 {{0, 2, 4, 6}, kArmS32x4UnzipLeft},
2583 {{1, 3, 5, 7}, kArmS32x4UnzipRight}, 2583 {{1, 3, 5, 7}, kArmS32x4UnzipRight},
2584 {{0, 4, 2, 6}, kArmS32x4TransposeLeft}, 2584 {{0, 4, 2, 6}, kArmS32x4TransposeLeft},
2585 {{1, 5, 3, 7}, kArmS32x4TransposeRight}, 2585 {{1, 5, 3, 7}, kArmS32x4TransposeRight},
2586 {{1, 0, 3, 2}, kArmS32x2Reverse}, 2586 {{1, 0, 3, 2}, kArmS32x2Reverse}};
2587 };
2588 2587
2589 static const ShuffleEntry<8> arch_s16x8_shuffles[] = { 2588 static const ShuffleEntry<8> arch_s16x8_shuffles[] = {
2590 {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft}, 2589 {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft},
2591 {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight}, 2590 {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight},
2592 {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft}, 2591 {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft},
2593 {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight}, 2592 {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight},
2594 {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft}, 2593 {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft},
2595 {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight}, 2594 {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight},
2596 {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse}, 2595 {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse},
2597 {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}, 2596 {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}};
2598 };
2599 2597
2600 static const ShuffleEntry<16> arch_s8x16_shuffles[] = { 2598 static const ShuffleEntry<16> arch_s8x16_shuffles[] = {
2601 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, 2599 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23},
2602 kArmS8x16ZipLeft}, 2600 kArmS8x16ZipLeft},
2603 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, 2601 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31},
2604 kArmS8x16ZipRight}, 2602 kArmS8x16ZipRight},
2605 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, 2603 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30},
2606 kArmS8x16UnzipLeft}, 2604 kArmS8x16UnzipLeft},
2607 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}, 2605 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31},
2608 kArmS8x16UnzipRight}, 2606 kArmS8x16UnzipRight},
2609 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}, 2607 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30},
2610 kArmS8x16TransposeLeft}, 2608 kArmS8x16TransposeLeft},
2611 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}, 2609 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31},
2612 kArmS8x16TransposeRight}, 2610 kArmS8x16TransposeRight},
2613 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse}, 2611 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse},
2614 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse}, 2612 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse},
2615 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}, 2613 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}};
2616 };
2617 2614
2618 // Use a non-shuffle opcode to signal no match. 2615 // Use a non-shuffle opcode to signal no match.
2619 static const ArchOpcode kNoShuffle = kArmS128Not; 2616 static const ArchOpcode kNoShuffle = kArmS128Not;
2620 2617
2621 template <int LANES> 2618 template <int LANES>
2622 ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle, 2619 ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle,
2623 const ShuffleEntry<LANES>* table, 2620 const ShuffleEntry<LANES>* table,
2624 size_t num_entries, uint8_t mask) { 2621 size_t num_entries, uint8_t mask) {
2625 for (size_t i = 0; i < num_entries; i++) { 2622 for (size_t i = 0; i < num_entries; i++) {
2626 const ShuffleEntry<LANES>& entry = table[i]; 2623 const ShuffleEntry<LANES>& entry = table[i];
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
2676 node->ReplaceInput(1, node->InputAt(0)); 2673 node->ReplaceInput(1, node->InputAt(0));
2677 mask = num_lanes - 1; 2674 mask = num_lanes - 1;
2678 } else if (src1_is_used && !src0_is_used) { 2675 } else if (src1_is_used && !src0_is_used) {
2679 node->ReplaceInput(0, node->InputAt(1)); 2676 node->ReplaceInput(0, node->InputAt(1));
2680 mask = num_lanes - 1; 2677 mask = num_lanes - 1;
2681 } 2678 }
2682 } 2679 }
2683 return mask; 2680 return mask;
2684 } 2681 }
2685 2682
2683 int32_t Pack4Lanes(const uint8_t* shuffle, uint8_t mask) {
2684 int32_t result = 0;
2685 for (int i = 3; i >= 0; i--) {
2686 result <<= 8;
2687 result |= shuffle[i] & mask;
2688 }
2689 return result;
2690 }
2691
2692 void ArrangeShuffleTable(ArmOperandGenerator* g, Node* input0, Node* input1,
2693 InstructionOperand* src0, InstructionOperand* src1) {
2694 if (input0 == input1) {
2695 // Unary, any q-register can be the table.
2696 *src0 = *src1 = g->UseRegister(input0);
2697 } else {
2698 // Binary, table registers must be consecutive.
2699 *src0 = g->UseFixed(input0, q0);
2700 *src1 = g->UseFixed(input1, q1);
2701 }
2702 }
2703
2686 } // namespace 2704 } // namespace
2687 2705
2688 void InstructionSelector::VisitS32x4Shuffle(Node* node) { 2706 void InstructionSelector::VisitS32x4Shuffle(Node* node) {
2689 const uint8_t* shuffle = OpParameter<uint8_t*>(node); 2707 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2690 uint8_t mask = CanonicalizeShuffle(this, node, 4); 2708 uint8_t mask = CanonicalizeShuffle(this, node, 4);
2691 ArchOpcode opcode = TryMatchArchShuffle<4>( 2709 ArchOpcode opcode = TryMatchArchShuffle<4>(
2692 shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask); 2710 shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask);
2693 if (opcode != kNoShuffle) { 2711 if (opcode != kNoShuffle) {
2694 VisitRRRShuffle(this, opcode, node); 2712 VisitRRRShuffle(this, opcode, node);
2695 return; 2713 return;
2696 } 2714 }
2697 ArmOperandGenerator g(this); 2715 ArmOperandGenerator g(this);
2698 uint8_t lanes = TryMatchConcat<4>(shuffle, mask); 2716 uint8_t lanes = TryMatchConcat<4>(shuffle, mask);
2699 if (lanes != 0) { 2717 if (lanes != 0) {
2700 Emit(kArmS8x16Concat, g.DefineAsRegister(node), 2718 Emit(kArmS8x16Concat, g.DefineAsRegister(node),
2701 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), 2719 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
2702 g.UseImmediate(lanes * 4)); 2720 g.UseImmediate(lanes * 4));
2703 return; 2721 return;
2704 } 2722 }
2705 // TODO(bbudge) vtbl to handle all other shuffles. 2723 Emit(kArmS32x4Shuffle, g.DefineAsRegister(node),
2724 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)),
2725 g.UseImmediate(Pack4Lanes(shuffle, mask)));
2706 } 2726 }
2707 2727
2708 void InstructionSelector::VisitS16x8Shuffle(Node* node) { 2728 void InstructionSelector::VisitS16x8Shuffle(Node* node) {
2709 const uint8_t* shuffle = OpParameter<uint8_t*>(node); 2729 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2710 uint8_t mask = CanonicalizeShuffle(this, node, 8); 2730 uint8_t mask = CanonicalizeShuffle(this, node, 8);
2711 ArchOpcode opcode = TryMatchArchShuffle<8>( 2731 ArchOpcode opcode = TryMatchArchShuffle<8>(
2712 shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask); 2732 shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask);
2713 if (opcode != kNoShuffle) { 2733 if (opcode != kNoShuffle) {
2714 VisitRRRShuffle(this, opcode, node); 2734 VisitRRRShuffle(this, opcode, node);
2715 return; 2735 return;
2716 } 2736 }
2717 ArmOperandGenerator g(this); 2737 ArmOperandGenerator g(this);
2738 Node* input0 = node->InputAt(0);
2739 Node* input1 = node->InputAt(1);
2718 uint8_t lanes = TryMatchConcat<8>(shuffle, mask); 2740 uint8_t lanes = TryMatchConcat<8>(shuffle, mask);
2719 if (lanes != 0) { 2741 if (lanes != 0) {
2720 Emit(kArmS8x16Concat, g.DefineAsRegister(node), 2742 Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
2721 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), 2743 g.UseRegister(input1), g.UseImmediate(lanes * 2));
2722 g.UseImmediate(lanes * 2)); 2744 return;
2723 } 2745 }
2724 // TODO(bbudge) vtbl to handle all other shuffles. 2746 // Code generator uses vtbl, arrange sources to form a valid lookup table.
2747 InstructionOperand src0, src1;
2748 ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
2749 Emit(kArmS16x8Shuffle, g.DefineAsRegister(node), src0, src1,
2750 g.UseImmediate(Pack4Lanes(shuffle, mask)),
2751 g.UseImmediate(Pack4Lanes(shuffle + 4, mask)));
2725 } 2752 }
2726 2753
2727 void InstructionSelector::VisitS8x16Shuffle(Node* node) { 2754 void InstructionSelector::VisitS8x16Shuffle(Node* node) {
2728 const uint8_t* shuffle = OpParameter<uint8_t*>(node); 2755 const uint8_t* shuffle = OpParameter<uint8_t*>(node);
2729 uint8_t mask = CanonicalizeShuffle(this, node, 16); 2756 uint8_t mask = CanonicalizeShuffle(this, node, 16);
2730 ArchOpcode opcode = TryMatchArchShuffle<16>( 2757 ArchOpcode opcode = TryMatchArchShuffle<16>(
2731 shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask); 2758 shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask);
2732 if (opcode != kNoShuffle) { 2759 if (opcode != kNoShuffle) {
2733 VisitRRRShuffle(this, opcode, node); 2760 VisitRRRShuffle(this, opcode, node);
2734 return; 2761 return;
2735 } 2762 }
2736 ArmOperandGenerator g(this); 2763 ArmOperandGenerator g(this);
2764 Node* input0 = node->InputAt(0);
2765 Node* input1 = node->InputAt(1);
2737 uint8_t lanes = TryMatchConcat<16>(shuffle, mask); 2766 uint8_t lanes = TryMatchConcat<16>(shuffle, mask);
2738 if (lanes != 0) { 2767 if (lanes != 0) {
2739 Emit(kArmS8x16Concat, g.DefineAsRegister(node), 2768 Emit(kArmS8x16Concat, g.DefineAsRegister(node), g.UseRegister(input0),
2740 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), 2769 g.UseRegister(input1), g.UseImmediate(lanes));
2741 g.UseImmediate(lanes)); 2770 return;
2742 } 2771 }
2743 // TODO(bbudge) vtbl to handle all other shuffles. 2772 // Code generator uses vtbl, arrange sources to form a valid lookup table.
2773 InstructionOperand src0, src1;
2774 ArrangeShuffleTable(&g, input0, input1, &src0, &src1);
2775 Emit(kArmS8x16Shuffle, g.DefineAsRegister(node), src0, src1,
2776 g.UseImmediate(Pack4Lanes(shuffle, mask)),
2777 g.UseImmediate(Pack4Lanes(shuffle + 4, mask)),
2778 g.UseImmediate(Pack4Lanes(shuffle + 8, mask)),
2779 g.UseImmediate(Pack4Lanes(shuffle + 12, mask)));
2744 } 2780 }
2745 2781
2746 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { 2782 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
2747 UNREACHABLE(); 2783 UNREACHABLE();
2748 } 2784 }
2749 2785
2750 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { 2786 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
2751 UNREACHABLE(); 2787 UNREACHABLE();
2752 } 2788 }
2753 2789
(...skipping 30 matching lines...) Expand all
2784 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); 2820 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);
2785 req_aligned[0] = MachineType::Float32(); 2821 req_aligned[0] = MachineType::Float32();
2786 req_aligned[1] = MachineType::Float64(); 2822 req_aligned[1] = MachineType::Float64();
2787 return MachineOperatorBuilder::AlignmentRequirements:: 2823 return MachineOperatorBuilder::AlignmentRequirements::
2788 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); 2824 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);
2789 } 2825 }
2790 2826
2791 } // namespace compiler 2827 } // namespace compiler
2792 } // namespace internal 2828 } // namespace internal
2793 } // namespace v8 2829 } // namespace v8
OLDNEW
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | test/cctest/wasm/test-run-wasm-simd.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698