Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/base/adapters.h" 5 #include "src/base/adapters.h"
6 #include "src/base/bits.h" 6 #include "src/base/bits.h"
7 #include "src/compiler/instruction-selector-impl.h" 7 #include "src/compiler/instruction-selector-impl.h"
8 #include "src/compiler/node-matchers.h" 8 #include "src/compiler/node-matchers.h"
9 #include "src/compiler/node-properties.h" 9 #include "src/compiler/node-properties.h"
10 10
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
553 // support unaligned access. We support unaligned FP loads by loading to 553 // support unaligned access. We support unaligned FP loads by loading to
554 // integer registers first, then moving to the destination FP register. 554 // integer registers first, then moving to the destination FP register.
555 switch (load_rep.representation()) { 555 switch (load_rep.representation()) {
556 case MachineRepresentation::kFloat32: { 556 case MachineRepresentation::kFloat32: {
557 InstructionOperand temp = g.TempRegister(); 557 InstructionOperand temp = g.TempRegister();
558 EmitLoad(this, opcode, &temp, base, index); 558 EmitLoad(this, opcode, &temp, base, index);
559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp); 559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
560 return; 560 return;
561 } 561 }
562 case MachineRepresentation::kFloat64: { 562 case MachineRepresentation::kFloat64: {
563 // TODO(arm): use vld1.8 for this when NEON is available. 563 if (CpuFeatures::IsSupported(NEON)) {
564 // Compute the address of the least-significant half of the FP value. 564 InstructionOperand output = g.DefineAsRegister(node);
565 // We assume that the base node is unlikely to be an encodable immediate 565 EmitLoad(this, kArmVld1F64, &output, base, index);
566 // or the result of a shift operation, so only consider the addressing 566 } else {
567 // mode that should be used for the index node. 567 // Compute the address of the least-significant half of the FP value.
568 InstructionCode add_opcode = kArmAdd; 568 // We assume that the base node is unlikely to be an encodable immediate
569 InstructionOperand inputs[3]; 569 // or the result of a shift operation, so only consider the addressing
570 inputs[0] = g.UseRegister(base); 570 // mode that should be used for the index node.
571 InstructionCode add_opcode = kArmAdd;
572 InstructionOperand inputs[3];
573 inputs[0] = g.UseRegister(base);
571 574
572 size_t input_count; 575 size_t input_count;
573 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count, 576 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
574 &inputs[1])) { 577 &inputs[1])) {
575 // input_count has been set by TryMatchImmediateOrShift(), so increment 578 // input_count has been set by TryMatchImmediateOrShift(), so
576 // it to account for the base register in inputs[0]. 579 // increment it to account for the base register in inputs[0].
577 input_count++; 580 input_count++;
578 } else { 581 } else {
579 add_opcode |= AddressingModeField::encode(kMode_Operand2_R); 582 add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
580 inputs[1] = g.UseRegister(index); 583 inputs[1] = g.UseRegister(index);
581 input_count = 2; // Base register and index. 584 input_count = 2; // Base register and index.
585 }
586
587 InstructionOperand addr = g.TempRegister();
588 Emit(add_opcode, 1, &addr, input_count, inputs);
589
590 // Load both halves and move to an FP register.
591 InstructionOperand fp_lo = g.TempRegister();
592 InstructionOperand fp_hi = g.TempRegister();
593 opcode |= AddressingModeField::encode(kMode_Offset_RI);
594 Emit(opcode, fp_lo, addr, g.TempImmediate(0));
595 Emit(opcode, fp_hi, addr, g.TempImmediate(4));
596 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
582 } 597 }
583
584 InstructionOperand addr = g.TempRegister();
585 Emit(add_opcode, 1, &addr, input_count, inputs);
586
587 // Load both halves and move to an FP register.
588 InstructionOperand fp_lo = g.TempRegister();
589 InstructionOperand fp_hi = g.TempRegister();
590 opcode |= AddressingModeField::encode(kMode_Offset_RI);
591 Emit(opcode, fp_lo, addr, g.TempImmediate(0));
592 Emit(opcode, fp_hi, addr, g.TempImmediate(4));
593 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
594 return; 598 return;
595 } 599 }
596 default: 600 default:
597 // All other cases should support unaligned accesses. 601 // All other cases should support unaligned accesses.
598 UNREACHABLE(); 602 UNREACHABLE();
599 return; 603 return;
600 } 604 }
601 } 605 }
602 606
603 void InstructionSelector::VisitUnalignedStore(Node* node) { 607 void InstructionSelector::VisitUnalignedStore(Node* node) {
(...skipping 13 matching lines...) Expand all
617 // value to integer registers first, then storing to the destination address. 621 // value to integer registers first, then storing to the destination address.
618 switch (store_rep) { 622 switch (store_rep) {
619 case MachineRepresentation::kFloat32: { 623 case MachineRepresentation::kFloat32: {
620 inputs[input_count++] = g.TempRegister(); 624 inputs[input_count++] = g.TempRegister();
621 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value)); 625 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));
622 inputs[input_count++] = g.UseRegister(base); 626 inputs[input_count++] = g.UseRegister(base);
623 EmitStore(this, kArmStr, input_count, inputs, index); 627 EmitStore(this, kArmStr, input_count, inputs, index);
624 return; 628 return;
625 } 629 }
626 case MachineRepresentation::kFloat64: { 630 case MachineRepresentation::kFloat64: {
627 // TODO(arm): use vst1.8 for this when NEON is available. 631 if (CpuFeatures::IsSupported(NEON)) {
628 // Store a 64-bit floating point value using two 32-bit integer stores. 632 inputs[input_count++] = g.UseRegister(value);
629 // Computing the store address here would require three live temporary 633 inputs[input_count++] = g.UseRegister(base);
630 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after 634 EmitStore(this, kArmVst1F64, input_count, inputs, index);
631 // storing the least-significant half of the value. 635 } else {
636 // Store a 64-bit floating point value using two 32-bit integer stores.
637 // Computing the store address here would require three live temporary
638 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
639 // storing the least-significant half of the value.
632 640
633 // First, move the 64-bit FP value into two temporary integer registers. 641 // First, move the 64-bit FP value into two temporary integer registers.
634 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()}; 642 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};
635 inputs[input_count++] = g.UseRegister(value); 643 inputs[input_count++] = g.UseRegister(value);
636 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, 644 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);
637 inputs);
638 645
639 // Store the least-significant half. 646 // Store the least-significant half.
640 inputs[0] = fp[0]; // Low 32-bits of FP value. 647 inputs[0] = fp[0]; // Low 32-bits of FP value.
641 inputs[input_count++] = g.UseRegister(base); // First store base address. 648 inputs[input_count++] =
642 EmitStore(this, kArmStr, input_count, inputs, index); 649 g.UseRegister(base); // First store base address.
650 EmitStore(this, kArmStr, input_count, inputs, index);
643 651
644 // Store the most-significant half. 652 // Store the most-significant half.
645 InstructionOperand base4 = g.TempRegister(); 653 InstructionOperand base4 = g.TempRegister();
646 Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4, 654 Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4,
647 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4. 655 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.
648 inputs[0] = fp[1]; // High 32-bits of FP value. 656 inputs[0] = fp[1]; // High 32-bits of FP value.
649 inputs[1] = base4; // Second store base + 4 address. 657 inputs[1] = base4; // Second store base + 4 address.
650 EmitStore(this, kArmStr, input_count, inputs, index); 658 EmitStore(this, kArmStr, input_count, inputs, index);
659 }
651 return; 660 return;
652 } 661 }
653 default: 662 default:
654 // All other cases should support unaligned accesses. 663 // All other cases should support unaligned accesses.
655 UNREACHABLE(); 664 UNREACHABLE();
656 return; 665 return;
657 } 666 }
658 } 667 }
659 668
660 void InstructionSelector::VisitCheckedLoad(Node* node) { 669 void InstructionSelector::VisitCheckedLoad(Node* node) {
(...skipping 1812 matching lines...) Expand 10 before | Expand all | Expand 10 after
2473 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); 2482 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);
2474 req_aligned[0] = MachineType::Float32(); 2483 req_aligned[0] = MachineType::Float32();
2475 req_aligned[1] = MachineType::Float64(); 2484 req_aligned[1] = MachineType::Float64();
2476 return MachineOperatorBuilder::AlignmentRequirements:: 2485 return MachineOperatorBuilder::AlignmentRequirements::
2477 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); 2486 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);
2478 } 2487 }
2479 2488
2480 } // namespace compiler 2489 } // namespace compiler
2481 } // namespace internal 2490 } // namespace internal
2482 } // namespace v8 2491 } // namespace v8
OLDNEW
« src/compiler/arm/code-generator-arm.cc ('K') | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698