Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1316)

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses (Closed)
Patch Set: Calculate the address separately Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/base/adapters.h" 5 #include "src/base/adapters.h"
6 #include "src/base/bits.h" 6 #include "src/base/bits.h"
7 #include "src/compiler/instruction-selector-impl.h" 7 #include "src/compiler/instruction-selector-impl.h"
8 #include "src/compiler/node-matchers.h" 8 #include "src/compiler/node-matchers.h"
9 #include "src/compiler/node-properties.h" 9 #include "src/compiler/node-properties.h"
10 10
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
553 // support unaligned access. We support unaligned FP loads by loading to 553 // support unaligned access. We support unaligned FP loads by loading to
554 // integer registers first, then moving to the destination FP register. 554 // integer registers first, then moving to the destination FP register.
555 switch (load_rep.representation()) { 555 switch (load_rep.representation()) {
556 case MachineRepresentation::kFloat32: { 556 case MachineRepresentation::kFloat32: {
557 InstructionOperand temp = g.TempRegister(); 557 InstructionOperand temp = g.TempRegister();
558 EmitLoad(this, opcode, &temp, base, index); 558 EmitLoad(this, opcode, &temp, base, index);
559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp); 559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
560 return; 560 return;
561 } 561 }
562 case MachineRepresentation::kFloat64: { 562 case MachineRepresentation::kFloat64: {
563 // TODO(arm): use vld1.8 for this when NEON is available.
564 // Compute the address of the least-significant half of the FP value. 563 // Compute the address of the least-significant half of the FP value.
565 // We assume that the base node is unlikely to be an encodable immediate 564 // We assume that the base node is unlikely to be an encodable immediate
566 // or the result of a shift operation, so only consider the addressing 565 // or the result of a shift operation, so only consider the addressing
567 // mode that should be used for the index node. 566 // mode that should be used for the index node.
568 InstructionCode add_opcode = kArmAdd; 567 InstructionCode add_opcode = kArmAdd;
569 InstructionOperand inputs[3]; 568 InstructionOperand inputs[3];
570 inputs[0] = g.UseRegister(base); 569 inputs[0] = g.UseRegister(base);
571 570
572 size_t input_count; 571 size_t input_count;
573 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count, 572 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
574 &inputs[1])) { 573 &inputs[1])) {
575 // input_count has been set by TryMatchImmediateOrShift(), so increment 574 // input_count has been set by TryMatchImmediateOrShift(), so
576 // it to account for the base register in inputs[0]. 575 // increment it to account for the base register in inputs[0].
577 input_count++; 576 input_count++;
578 } else { 577 } else {
579 add_opcode |= AddressingModeField::encode(kMode_Operand2_R); 578 add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
580 inputs[1] = g.UseRegister(index); 579 inputs[1] = g.UseRegister(index);
581 input_count = 2; // Base register and index. 580 input_count = 2; // Base register and index.
582 } 581 }
583 582
584 InstructionOperand addr = g.TempRegister(); 583 InstructionOperand addr = g.TempRegister();
585 Emit(add_opcode, 1, &addr, input_count, inputs); 584 Emit(add_opcode, 1, &addr, input_count, inputs);
586 585
587 // Load both halves and move to an FP register. 586 if (CpuFeatures::IsSupported(NEON)) {
588 InstructionOperand fp_lo = g.TempRegister(); 587 // With NEON we can load directly from the calculated address.
589 InstructionOperand fp_hi = g.TempRegister(); 588 Emit(kArmVld1F64, g.DefineAsRegister(node), addr);
590 opcode |= AddressingModeField::encode(kMode_Offset_RI); 589 } else {
591 Emit(opcode, fp_lo, addr, g.TempImmediate(0)); 590 // Load both halves and move to an FP register.
592 Emit(opcode, fp_hi, addr, g.TempImmediate(4)); 591 InstructionOperand fp_lo = g.TempRegister();
593 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi); 592 InstructionOperand fp_hi = g.TempRegister();
593 opcode |= AddressingModeField::encode(kMode_Offset_RI);
594 Emit(opcode, fp_lo, addr, g.TempImmediate(0));
595 Emit(opcode, fp_hi, addr, g.TempImmediate(4));
596 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
597 }
594 return; 598 return;
595 } 599 }
596 default: 600 default:
597 // All other cases should support unaligned accesses. 601 // All other cases should support unaligned accesses.
598 UNREACHABLE(); 602 UNREACHABLE();
599 return; 603 return;
600 } 604 }
601 } 605 }
602 606
603 void InstructionSelector::VisitUnalignedStore(Node* node) { 607 void InstructionSelector::VisitUnalignedStore(Node* node) {
(...skipping 13 matching lines...) Expand all
617 // value to integer registers first, then storing to the destination address. 621 // value to integer registers first, then storing to the destination address.
618 switch (store_rep) { 622 switch (store_rep) {
619 case MachineRepresentation::kFloat32: { 623 case MachineRepresentation::kFloat32: {
620 inputs[input_count++] = g.TempRegister(); 624 inputs[input_count++] = g.TempRegister();
621 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value)); 625 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));
622 inputs[input_count++] = g.UseRegister(base); 626 inputs[input_count++] = g.UseRegister(base);
623 EmitStore(this, kArmStr, input_count, inputs, index); 627 EmitStore(this, kArmStr, input_count, inputs, index);
624 return; 628 return;
625 } 629 }
626 case MachineRepresentation::kFloat64: { 630 case MachineRepresentation::kFloat64: {
627 // TODO(arm): use vst1.8 for this when NEON is available. 631 if (CpuFeatures::IsSupported(NEON)) {
628 // Store a 64-bit floating point value using two 32-bit integer stores. 632 InstructionOperand address = g.TempRegister();
629 // Computing the store address here would require three live temporary 633 {
630 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after 634 // First we have to calculate the actual address.
631 // storing the least-significant half of the value. 635 InstructionCode add_opcode = kArmAdd;
636 InstructionOperand inputs[3];
637 inputs[0] = g.UseRegister(base);
632 638
633 // First, move the 64-bit FP value into two temporary integer registers. 639 size_t input_count;
634 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()}; 640 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
635 inputs[input_count++] = g.UseRegister(value); 641 &inputs[1])) {
636 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, 642 // input_count has been set by TryMatchImmediateOrShift(), so
637 inputs); 643 // increment it to account for the base register in inputs[0].
644 input_count++;
645 } else {
646 add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
647 inputs[1] = g.UseRegister(index);
648 input_count = 2; // Base register and index.
649 }
638 650
639 // Store the least-significant half. 651 Emit(add_opcode, 1, &address, input_count, inputs);
640 inputs[0] = fp[0]; // Low 32-bits of FP value. 652 }
641 inputs[input_count++] = g.UseRegister(base); // First store base address.
642 EmitStore(this, kArmStr, input_count, inputs, index);
643 653
644 // Store the most-significant half. 654 inputs[input_count++] = g.UseRegister(value);
645 InstructionOperand base4 = g.TempRegister(); 655 inputs[input_count++] = address;
646 Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4, 656 Emit(kArmVst1F64, 0, nullptr, input_count, inputs);
647 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4. 657 } else {
648 inputs[0] = fp[1]; // High 32-bits of FP value. 658 // Store a 64-bit floating point value using two 32-bit integer stores.
649 inputs[1] = base4; // Second store base + 4 address. 659 // Computing the store address here would require three live temporary
650 EmitStore(this, kArmStr, input_count, inputs, index); 660 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
661 // storing the least-significant half of the value.
662
663 // First, move the 64-bit FP value into two temporary integer registers.
664 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};
665 inputs[input_count++] = g.UseRegister(value);
666 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);
667
668 // Store the least-significant half.
669 inputs[0] = fp[0]; // Low 32-bits of FP value.
670 inputs[input_count++] =
671 g.UseRegister(base); // First store base address.
672 EmitStore(this, kArmStr, input_count, inputs, index);
673
674 // Store the most-significant half.
675 InstructionOperand base4 = g.TempRegister();
676 Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4,
677 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.
678 inputs[0] = fp[1]; // High 32-bits of FP value.
679 inputs[1] = base4; // Second store base + 4 address.
680 EmitStore(this, kArmStr, input_count, inputs, index);
681 }
651 return; 682 return;
652 } 683 }
653 default: 684 default:
654 // All other cases should support unaligned accesses. 685 // All other cases should support unaligned accesses.
655 UNREACHABLE(); 686 UNREACHABLE();
656 return; 687 return;
657 } 688 }
658 } 689 }
659 690
660 void InstructionSelector::VisitCheckedLoad(Node* node) { 691 void InstructionSelector::VisitCheckedLoad(Node* node) {
(...skipping 1812 matching lines...) Expand 10 before | Expand all | Expand 10 after
2473 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); 2504 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);
2474 req_aligned[0] = MachineType::Float32(); 2505 req_aligned[0] = MachineType::Float32();
2475 req_aligned[1] = MachineType::Float64(); 2506 req_aligned[1] = MachineType::Float64();
2476 return MachineOperatorBuilder::AlignmentRequirements:: 2507 return MachineOperatorBuilder::AlignmentRequirements::
2477 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); 2508 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);
2478 } 2509 }
2479 2510
2480 } // namespace compiler 2511 } // namespace compiler
2481 } // namespace internal 2512 } // namespace internal
2482 } // namespace v8 2513 } // namespace v8
OLDNEW
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698