OLD | NEW |
---|---|
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/base/adapters.h" | 5 #include "src/base/adapters.h" |
6 #include "src/base/bits.h" | 6 #include "src/base/bits.h" |
7 #include "src/compiler/instruction-selector-impl.h" | 7 #include "src/compiler/instruction-selector-impl.h" |
8 #include "src/compiler/node-matchers.h" | 8 #include "src/compiler/node-matchers.h" |
9 #include "src/compiler/node-properties.h" | 9 #include "src/compiler/node-properties.h" |
10 | 10 |
(...skipping 2396 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2407 V(I32x4UConvertF32x4, kArmI32x4UConvertF32x4) \ | 2407 V(I32x4UConvertF32x4, kArmI32x4UConvertF32x4) \ |
2408 V(I32x4UConvertI16x8Low, kArmI32x4UConvertI16x8Low) \ | 2408 V(I32x4UConvertI16x8Low, kArmI32x4UConvertI16x8Low) \ |
2409 V(I32x4UConvertI16x8High, kArmI32x4UConvertI16x8High) \ | 2409 V(I32x4UConvertI16x8High, kArmI32x4UConvertI16x8High) \ |
2410 V(I16x8SConvertI8x16Low, kArmI16x8SConvertI8x16Low) \ | 2410 V(I16x8SConvertI8x16Low, kArmI16x8SConvertI8x16Low) \ |
2411 V(I16x8SConvertI8x16High, kArmI16x8SConvertI8x16High) \ | 2411 V(I16x8SConvertI8x16High, kArmI16x8SConvertI8x16High) \ |
2412 V(I16x8Neg, kArmI16x8Neg) \ | 2412 V(I16x8Neg, kArmI16x8Neg) \ |
2413 V(I16x8UConvertI8x16Low, kArmI16x8UConvertI8x16Low) \ | 2413 V(I16x8UConvertI8x16Low, kArmI16x8UConvertI8x16Low) \ |
2414 V(I16x8UConvertI8x16High, kArmI16x8UConvertI8x16High) \ | 2414 V(I16x8UConvertI8x16High, kArmI16x8UConvertI8x16High) \ |
2415 V(I8x16Neg, kArmI8x16Neg) \ | 2415 V(I8x16Neg, kArmI8x16Neg) \ |
2416 V(S128Not, kArmS128Not) \ | 2416 V(S128Not, kArmS128Not) \ |
2417 V(S32x2Reverse, kArmS32x2Reverse) \ | |
2418 V(S16x4Reverse, kArmS16x4Reverse) \ | |
2419 V(S16x2Reverse, kArmS16x2Reverse) \ | |
2420 V(S8x8Reverse, kArmS8x8Reverse) \ | |
2421 V(S8x4Reverse, kArmS8x4Reverse) \ | |
2422 V(S8x2Reverse, kArmS8x2Reverse) \ | |
2423 V(S1x4Not, kArmS128Not) \ | 2417 V(S1x4Not, kArmS128Not) \ |
2424 V(S1x4AnyTrue, kArmS1x4AnyTrue) \ | 2418 V(S1x4AnyTrue, kArmS1x4AnyTrue) \ |
2425 V(S1x4AllTrue, kArmS1x4AllTrue) \ | 2419 V(S1x4AllTrue, kArmS1x4AllTrue) \ |
2426 V(S1x8Not, kArmS128Not) \ | 2420 V(S1x8Not, kArmS128Not) \ |
2427 V(S1x8AnyTrue, kArmS1x8AnyTrue) \ | 2421 V(S1x8AnyTrue, kArmS1x8AnyTrue) \ |
2428 V(S1x8AllTrue, kArmS1x8AllTrue) \ | 2422 V(S1x8AllTrue, kArmS1x8AllTrue) \ |
2429 V(S1x16Not, kArmS128Not) \ | 2423 V(S1x16Not, kArmS128Not) \ |
2430 V(S1x16AnyTrue, kArmS1x16AnyTrue) \ | 2424 V(S1x16AnyTrue, kArmS1x16AnyTrue) \ |
2431 V(S1x16AllTrue, kArmS1x16AllTrue) | 2425 V(S1x16AllTrue, kArmS1x16AllTrue) |
2432 | 2426 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2511 V(S1x4And, kArmS128And) \ | 2505 V(S1x4And, kArmS128And) \ |
2512 V(S1x4Or, kArmS128Or) \ | 2506 V(S1x4Or, kArmS128Or) \ |
2513 V(S1x4Xor, kArmS128Xor) \ | 2507 V(S1x4Xor, kArmS128Xor) \ |
2514 V(S1x8And, kArmS128And) \ | 2508 V(S1x8And, kArmS128And) \ |
2515 V(S1x8Or, kArmS128Or) \ | 2509 V(S1x8Or, kArmS128Or) \ |
2516 V(S1x8Xor, kArmS128Xor) \ | 2510 V(S1x8Xor, kArmS128Xor) \ |
2517 V(S1x16And, kArmS128And) \ | 2511 V(S1x16And, kArmS128And) \ |
2518 V(S1x16Or, kArmS128Or) \ | 2512 V(S1x16Or, kArmS128Or) \ |
2519 V(S1x16Xor, kArmS128Xor) | 2513 V(S1x16Xor, kArmS128Xor) |
2520 | 2514 |
2521 #define SIMD_SHUFFLE_OP_LIST(V) \ | |
2522 V(S32x4ZipLeft) \ | |
2523 V(S32x4ZipRight) \ | |
2524 V(S32x4UnzipLeft) \ | |
2525 V(S32x4UnzipRight) \ | |
2526 V(S32x4TransposeLeft) \ | |
2527 V(S32x4TransposeRight) \ | |
2528 V(S16x8ZipLeft) \ | |
2529 V(S16x8ZipRight) \ | |
2530 V(S16x8UnzipLeft) \ | |
2531 V(S16x8UnzipRight) \ | |
2532 V(S16x8TransposeLeft) \ | |
2533 V(S16x8TransposeRight) \ | |
2534 V(S8x16ZipLeft) \ | |
2535 V(S8x16ZipRight) \ | |
2536 V(S8x16UnzipLeft) \ | |
2537 V(S8x16UnzipRight) \ | |
2538 V(S8x16TransposeLeft) \ | |
2539 V(S8x16TransposeRight) | |
2540 | |
2541 #define SIMD_VISIT_SPLAT(Type) \ | 2515 #define SIMD_VISIT_SPLAT(Type) \ |
2542 void InstructionSelector::Visit##Type##Splat(Node* node) { \ | 2516 void InstructionSelector::Visit##Type##Splat(Node* node) { \ |
2543 VisitRR(this, kArm##Type##Splat, node); \ | 2517 VisitRR(this, kArm##Type##Splat, node); \ |
2544 } | 2518 } |
2545 SIMD_TYPE_LIST(SIMD_VISIT_SPLAT) | 2519 SIMD_TYPE_LIST(SIMD_VISIT_SPLAT) |
2546 #undef SIMD_VISIT_SPLAT | 2520 #undef SIMD_VISIT_SPLAT |
2547 | 2521 |
2548 #define SIMD_VISIT_EXTRACT_LANE(Type) \ | 2522 #define SIMD_VISIT_EXTRACT_LANE(Type) \ |
2549 void InstructionSelector::Visit##Type##ExtractLane(Node* node) { \ | 2523 void InstructionSelector::Visit##Type##ExtractLane(Node* node) { \ |
2550 VisitRRI(this, kArm##Type##ExtractLane, node); \ | 2524 VisitRRI(this, kArm##Type##ExtractLane, node); \ |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2588 SIMD_BINOP_LIST(SIMD_VISIT_BINOP) | 2562 SIMD_BINOP_LIST(SIMD_VISIT_BINOP) |
2589 #undef SIMD_VISIT_BINOP | 2563 #undef SIMD_VISIT_BINOP |
2590 | 2564 |
2591 #define SIMD_VISIT_SELECT_OP(format) \ | 2565 #define SIMD_VISIT_SELECT_OP(format) \ |
2592 void InstructionSelector::VisitS##format##Select(Node* node) { \ | 2566 void InstructionSelector::VisitS##format##Select(Node* node) { \ |
2593 VisitRRRR(this, kArmS128Select, node); \ | 2567 VisitRRRR(this, kArmS128Select, node); \ |
2594 } | 2568 } |
2595 SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP) | 2569 SIMD_FORMAT_LIST(SIMD_VISIT_SELECT_OP) |
2596 #undef SIMD_VISIT_SELECT_OP | 2570 #undef SIMD_VISIT_SELECT_OP |
2597 | 2571 |
2598 #define SIMD_VISIT_SHUFFLE_OP(Name) \ | 2572 namespace { |
2599 void InstructionSelector::Visit##Name(Node* node) { \ | 2573 template <int LANES> |
2600 VisitRRRShuffle(this, kArm##Name, node); \ | 2574 struct ShuffleEntry { |
2575 uint8_t shuffle[LANES]; | |
2576 ArchOpcode opcode; | |
2577 }; | |
2578 | |
2579 static const ShuffleEntry<4> arch_s32x4_shuffles[] = { | |
2580 {{0, 4, 1, 5}, kArmS32x4ZipLeft}, | |
martyn.capewell
2017/05/02 14:56:56
Odd suggestion: I wonder if the shuffle operation
bbudge
2017/05/02 22:59:27
I think these are the only patterns we'll be match
| |
2581 {{2, 6, 3, 7}, kArmS32x4ZipRight}, | |
2582 {{0, 2, 4, 6}, kArmS32x4UnzipLeft}, | |
2583 {{1, 3, 5, 7}, kArmS32x4UnzipRight}, | |
2584 {{0, 4, 2, 6}, kArmS32x4TransposeLeft}, | |
2585 {{1, 5, 3, 7}, kArmS32x4TransposeRight}, | |
2586 {{1, 0, 3, 2}, kArmS32x2Reverse}, | |
2587 }; | |
2588 | |
2589 static const ShuffleEntry<8> arch_s16x8_shuffles[] = { | |
2590 {{0, 8, 1, 9, 2, 10, 3, 11}, kArmS16x8ZipLeft}, | |
2591 {{4, 12, 5, 13, 6, 14, 7, 15}, kArmS16x8ZipRight}, | |
2592 {{0, 2, 4, 6, 8, 10, 12, 14}, kArmS16x8UnzipLeft}, | |
2593 {{1, 3, 5, 7, 9, 11, 13, 15}, kArmS16x8UnzipRight}, | |
2594 {{0, 8, 2, 10, 4, 12, 6, 14}, kArmS16x8TransposeLeft}, | |
2595 {{1, 9, 3, 11, 5, 13, 7, 15}, kArmS16x8TransposeRight}, | |
2596 {{3, 2, 1, 0, 7, 6, 5, 4}, kArmS16x4Reverse}, | |
2597 {{1, 0, 3, 2, 5, 4, 7, 6}, kArmS16x2Reverse}, | |
2598 }; | |
2599 | |
2600 static const ShuffleEntry<16> arch_s8x16_shuffles[] = { | |
2601 {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}, | |
2602 kArmS8x16ZipLeft}, | |
2603 {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}, | |
2604 kArmS8x16ZipRight}, | |
2605 {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}, | |
2606 kArmS8x16UnzipLeft}, | |
2607 {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}, | |
2608 kArmS8x16UnzipRight}, | |
2609 {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}, | |
2610 kArmS8x16TransposeLeft}, | |
2611 {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}, | |
2612 kArmS8x16TransposeRight}, | |
2613 {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}, kArmS8x8Reverse}, | |
2614 {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12}, kArmS8x4Reverse}, | |
2615 {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, kArmS8x2Reverse}, | |
2616 }; | |
2617 | |
2618 // Use a non-shuffle opcode to signal no match. | |
2619 static const ArchOpcode kNoShuffle = kArmS128Not; | |
2620 | |
2621 template <int LANES> | |
2622 ArchOpcode TryMatchArchShuffle(const uint8_t* shuffle, | |
2623 const ShuffleEntry<LANES>* table, | |
2624 size_t num_entries, uint8_t mask) { | |
2625 for (size_t i = 0; i < num_entries; i++) { | |
2626 const ShuffleEntry<LANES>& entry = table[i]; | |
2627 int j = 0; | |
2628 for (; j < LANES; j++) { | |
2629 if ((entry.shuffle[j] & mask) != (shuffle[j] & mask)) { | |
2630 break; | |
2631 } | |
2632 } | |
2633 if (j == LANES) return entry.opcode; | |
2601 } | 2634 } |
2602 SIMD_SHUFFLE_OP_LIST(SIMD_VISIT_SHUFFLE_OP) | 2635 return kNoShuffle; |
2603 #undef SIMD_VISIT_SHUFFLE_OP | 2636 } |
2604 | 2637 |
2605 void InstructionSelector::VisitS8x16Concat(Node* node) { | 2638 // Returns the bias if shuffle is a concatenation, 0 otherwise. |
2639 template <int LANES> | |
2640 uint8_t TryMatchConcat(const uint8_t* shuffle, uint8_t mask) { | |
2641 uint8_t start = shuffle[0]; | |
2642 int i = 1; | |
2643 for (; i < LANES - start; i++) { | |
2644 if ((shuffle[i] & mask) != ((shuffle[i - 1] + 1) & mask)) return 0; | |
2645 } | |
2646 uint8_t wrap = LANES; | |
2647 for (; i < LANES; i++, wrap++) { | |
2648 if ((shuffle[i] & mask) != (wrap & mask)) return 0; | |
2649 } | |
2650 return start; | |
2651 } | |
2652 | |
2653 // Canonicalize shuffles to make pattern matching simpler. Returns a mask that | |
2654 // will ignore the high bit of indices in some cases. | |
2655 uint8_t CanonicalizeShuffle(InstructionSelector* selector, Node* node, | |
2656 int num_lanes) { | |
2657 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | |
2658 uint8_t mask = 0xff; | |
2659 // Determine if the shuffle is unary (taking lanes from only one source). | |
2660 bool src0 = false; | |
martyn.capewell
2017/05/02 14:56:56
This would be clearer named src0_is_unary, etc.
bbudge
2017/05/02 22:59:27
How about src0_is_used? Done.
| |
2661 bool src1 = false; | |
2662 for (int i = 0; i < num_lanes; i++) { | |
2663 if (shuffle[i] < num_lanes) { | |
2664 src0 = true; | |
2665 } else { | |
2666 src1 = true; | |
2667 } | |
2668 } | |
2669 // If shuffle is unary, replace the unused source with the other, and set the | |
2670 // mask to expand shuffle matching to ignore the high bit of the indices. | |
2671 if (src0 && !src1) { | |
2672 node->ReplaceInput(1, node->InputAt(0)); | |
2673 mask = num_lanes - 1; | |
2674 } else if (src1 && !src0) { | |
2675 node->ReplaceInput(0, node->InputAt(1)); | |
2676 mask = num_lanes - 1; | |
2677 } else if (selector->GetVirtualRegister(node->InputAt(0)) == | |
2678 selector->GetVirtualRegister(node->InputAt(1))) { | |
martyn.capewell
2017/05/02 14:56:56
This case could be moved earlier, avoiding the shu
bbudge
2017/05/02 22:59:27
Done.
| |
2679 mask = num_lanes - 1; | |
2680 } | |
2681 return mask; | |
2682 } | |
2683 | |
2684 } // namespace | |
2685 | |
2686 void InstructionSelector::VisitS32x4Shuffle(Node* node) { | |
2687 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | |
2688 uint8_t mask = CanonicalizeShuffle(this, node, 4); | |
2689 ArchOpcode opcode = TryMatchArchShuffle<4>( | |
2690 shuffle, arch_s32x4_shuffles, arraysize(arch_s32x4_shuffles), mask); | |
2691 if (opcode != kNoShuffle) { | |
2692 VisitRRRShuffle(this, opcode, node); | |
2693 return; | |
2694 } | |
2606 ArmOperandGenerator g(this); | 2695 ArmOperandGenerator g(this); |
2607 int32_t imm = OpParameter<int32_t>(node); | 2696 uint8_t lanes = TryMatchConcat<4>(shuffle, mask); |
2608 Emit(kArmS8x16Concat, g.DefineAsRegister(node), | 2697 if (lanes != 0) { |
2609 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | 2698 Emit(kArmS8x16Concat, g.DefineAsRegister(node), |
2610 g.UseImmediate(imm)); | 2699 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), |
2700 g.UseImmediate(lanes * 4)); | |
2701 return; | |
2702 } | |
2703 // TODO(bbudge) vtbl to handle all other shuffles. | |
2704 } | |
2705 | |
2706 void InstructionSelector::VisitS16x8Shuffle(Node* node) { | |
2707 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | |
2708 uint8_t mask = CanonicalizeShuffle(this, node, 8); | |
2709 ArchOpcode opcode = TryMatchArchShuffle<8>( | |
2710 shuffle, arch_s16x8_shuffles, arraysize(arch_s16x8_shuffles), mask); | |
2711 if (opcode != kNoShuffle) { | |
2712 VisitRRRShuffle(this, opcode, node); | |
2713 return; | |
2714 } | |
2715 ArmOperandGenerator g(this); | |
2716 uint8_t lanes = TryMatchConcat<8>(shuffle, mask); | |
2717 if (lanes != 0) { | |
2718 Emit(kArmS8x16Concat, g.DefineAsRegister(node), | |
2719 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | |
2720 g.UseImmediate(lanes * 2)); | |
2721 } | |
2722 // TODO(bbudge) vtbl to handle all other shuffles. | |
2723 } | |
2724 | |
2725 void InstructionSelector::VisitS8x16Shuffle(Node* node) { | |
2726 const uint8_t* shuffle = OpParameter<uint8_t*>(node); | |
2727 uint8_t mask = CanonicalizeShuffle(this, node, 16); | |
2728 ArchOpcode opcode = TryMatchArchShuffle<16>( | |
2729 shuffle, arch_s8x16_shuffles, arraysize(arch_s8x16_shuffles), mask); | |
2730 if (opcode != kNoShuffle) { | |
2731 VisitRRRShuffle(this, opcode, node); | |
2732 return; | |
2733 } | |
2734 ArmOperandGenerator g(this); | |
2735 uint8_t lanes = TryMatchConcat<16>(shuffle, mask); | |
2736 if (lanes != 0) { | |
2737 Emit(kArmS8x16Concat, g.DefineAsRegister(node), | |
2738 g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1)), | |
2739 g.UseImmediate(lanes)); | |
2740 } | |
2741 // TODO(bbudge) vtbl to handle all other shuffles. | |
2611 } | 2742 } |
2612 | 2743 |
2613 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { | 2744 void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { |
2614 UNREACHABLE(); | 2745 UNREACHABLE(); |
2615 } | 2746 } |
2616 | 2747 |
2617 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { | 2748 void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { |
2618 UNREACHABLE(); | 2749 UNREACHABLE(); |
2619 } | 2750 } |
2620 | 2751 |
(...skipping 30 matching lines...) Expand all Loading... | |
2651 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); | 2782 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); |
2652 req_aligned[0] = MachineType::Float32(); | 2783 req_aligned[0] = MachineType::Float32(); |
2653 req_aligned[1] = MachineType::Float64(); | 2784 req_aligned[1] = MachineType::Float64(); |
2654 return MachineOperatorBuilder::AlignmentRequirements:: | 2785 return MachineOperatorBuilder::AlignmentRequirements:: |
2655 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); | 2786 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); |
2656 } | 2787 } |
2657 | 2788 |
2658 } // namespace compiler | 2789 } // namespace compiler |
2659 } // namespace internal | 2790 } // namespace internal |
2660 } // namespace v8 | 2791 } // namespace v8 |
OLD | NEW |