Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(416)

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769083003: [ARM] Make Simd 128 bit load/store more like existing load/store. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/base/adapters.h" 5 #include "src/base/adapters.h"
6 #include "src/base/bits.h" 6 #include "src/base/bits.h"
7 #include "src/compiler/instruction-selector-impl.h" 7 #include "src/compiler/instruction-selector-impl.h"
8 #include "src/compiler/node-matchers.h" 8 #include "src/compiler/node-matchers.h"
9 #include "src/compiler/node-properties.h" 9 #include "src/compiler/node-properties.h"
10 10
(...skipping 409 matching lines...) Expand 10 before | Expand all | Expand 10 after
420 case MachineRepresentation::kWord16: 420 case MachineRepresentation::kWord16:
421 opcode = load_rep.IsUnsigned() ? kArmLdrh : kArmLdrsh; 421 opcode = load_rep.IsUnsigned() ? kArmLdrh : kArmLdrsh;
422 break; 422 break;
423 case MachineRepresentation::kTaggedSigned: // Fall through. 423 case MachineRepresentation::kTaggedSigned: // Fall through.
424 case MachineRepresentation::kTaggedPointer: // Fall through. 424 case MachineRepresentation::kTaggedPointer: // Fall through.
425 case MachineRepresentation::kTagged: // Fall through. 425 case MachineRepresentation::kTagged: // Fall through.
426 case MachineRepresentation::kWord32: 426 case MachineRepresentation::kWord32:
427 opcode = kArmLdr; 427 opcode = kArmLdr;
428 break; 428 break;
429 case MachineRepresentation::kSimd128: 429 case MachineRepresentation::kSimd128:
430 opcode = kArmSimd128Load; 430 opcode = kArmVld1S128;
431 break; 431 break;
432 case MachineRepresentation::kWord64: // Fall through. 432 case MachineRepresentation::kWord64: // Fall through.
433 case MachineRepresentation::kSimd1x4: // Fall through. 433 case MachineRepresentation::kSimd1x4: // Fall through.
434 case MachineRepresentation::kSimd1x8: // Fall through. 434 case MachineRepresentation::kSimd1x8: // Fall through.
435 case MachineRepresentation::kSimd1x16: // Fall through. 435 case MachineRepresentation::kSimd1x16: // Fall through.
436 case MachineRepresentation::kNone: 436 case MachineRepresentation::kNone:
437 UNREACHABLE(); 437 UNREACHABLE();
438 return; 438 return;
439 } 439 }
440 440
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 case MachineRepresentation::kWord16: 510 case MachineRepresentation::kWord16:
511 opcode = kArmStrh; 511 opcode = kArmStrh;
512 break; 512 break;
513 case MachineRepresentation::kTaggedSigned: // Fall through. 513 case MachineRepresentation::kTaggedSigned: // Fall through.
514 case MachineRepresentation::kTaggedPointer: // Fall through. 514 case MachineRepresentation::kTaggedPointer: // Fall through.
515 case MachineRepresentation::kTagged: // Fall through. 515 case MachineRepresentation::kTagged: // Fall through.
516 case MachineRepresentation::kWord32: 516 case MachineRepresentation::kWord32:
517 opcode = kArmStr; 517 opcode = kArmStr;
518 break; 518 break;
519 case MachineRepresentation::kSimd128: 519 case MachineRepresentation::kSimd128:
520 opcode = kArmSimd128Store; 520 opcode = kArmVst1S128;
521 break; 521 break;
522 case MachineRepresentation::kWord64: // Fall through. 522 case MachineRepresentation::kWord64: // Fall through.
523 case MachineRepresentation::kSimd1x4: // Fall through. 523 case MachineRepresentation::kSimd1x4: // Fall through.
524 case MachineRepresentation::kSimd1x8: // Fall through. 524 case MachineRepresentation::kSimd1x8: // Fall through.
525 case MachineRepresentation::kSimd1x16: // Fall through. 525 case MachineRepresentation::kSimd1x16: // Fall through.
526 case MachineRepresentation::kNone: 526 case MachineRepresentation::kNone:
527 UNREACHABLE(); 527 UNREACHABLE();
528 return; 528 return;
529 } 529 }
530 530
531 InstructionOperand inputs[4]; 531 InstructionOperand inputs[4];
532 size_t input_count = 0; 532 size_t input_count = 0;
533 inputs[input_count++] = g.UseRegister(value); 533 inputs[input_count++] = g.UseRegister(value);
534 inputs[input_count++] = g.UseRegister(base); 534 inputs[input_count++] = g.UseRegister(base);
535 EmitStore(this, opcode, input_count, inputs, index); 535 EmitStore(this, opcode, input_count, inputs, index);
536 } 536 }
537 } 537 }
538 538
539 void InstructionSelector::VisitProtectedStore(Node* node) { 539 void InstructionSelector::VisitProtectedStore(Node* node) {
540 // TODO(eholk) 540 // TODO(eholk)
541 UNIMPLEMENTED(); 541 UNIMPLEMENTED();
542 } 542 }
543 543
544 void InstructionSelector::VisitUnalignedLoad(Node* node) { 544 void InstructionSelector::VisitUnalignedLoad(Node* node) {
545 UnalignedLoadRepresentation load_rep = 545 MachineRepresentation load_rep =
546 UnalignedLoadRepresentationOf(node->op()); 546 UnalignedLoadRepresentationOf(node->op()).representation();
547 ArmOperandGenerator g(this); 547 ArmOperandGenerator g(this);
548 Node* base = node->InputAt(0); 548 Node* base = node->InputAt(0);
549 Node* index = node->InputAt(1); 549 Node* index = node->InputAt(1);
550 550
551 InstructionCode opcode = kArmLdr; 551 InstructionCode opcode = kArmLdr;
552 // Only floating point loads need to be specially handled; integer loads 552 // Only floating point loads need to be specially handled; integer loads
553 // support unaligned access. We support unaligned FP loads by loading to 553 // support unaligned access. We support unaligned FP loads by loading to
554 // integer registers first, then moving to the destination FP register. 554 // integer registers first, then moving to the destination FP register. If
555 switch (load_rep.representation()) { 555 // NEON is supported, we use the vld1.8 instruction.
556 switch (load_rep) {
556 case MachineRepresentation::kFloat32: { 557 case MachineRepresentation::kFloat32: {
557 InstructionOperand temp = g.TempRegister(); 558 InstructionOperand temp = g.TempRegister();
558 EmitLoad(this, opcode, &temp, base, index); 559 EmitLoad(this, opcode, &temp, base, index);
559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp); 560 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
560 return; 561 return;
561 } 562 }
562 case MachineRepresentation::kFloat64: { 563 case MachineRepresentation::kFloat64:
563 // Compute the address of the least-significant half of the FP value. 564 case MachineRepresentation::kSimd128: {
565 // Compute the address of the least-significant byte of the FP value.
564 // We assume that the base node is unlikely to be an encodable immediate 566 // We assume that the base node is unlikely to be an encodable immediate
565 // or the result of a shift operation, so only consider the addressing 567 // or the result of a shift operation, so only consider the addressing
566 // mode that should be used for the index node. 568 // mode that should be used for the index node.
567 InstructionCode add_opcode = kArmAdd; 569 InstructionCode add_opcode = kArmAdd;
568 InstructionOperand inputs[3]; 570 InstructionOperand inputs[3];
569 inputs[0] = g.UseRegister(base); 571 inputs[0] = g.UseRegister(base);
570 572
571 size_t input_count; 573 size_t input_count;
572 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count, 574 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
573 &inputs[1])) { 575 &inputs[1])) {
574 // input_count has been set by TryMatchImmediateOrShift(), so 576 // input_count has been set by TryMatchImmediateOrShift(), so
575 // increment it to account for the base register in inputs[0]. 577 // increment it to account for the base register in inputs[0].
576 input_count++; 578 input_count++;
577 } else { 579 } else {
578 add_opcode |= AddressingModeField::encode(kMode_Operand2_R); 580 add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
579 inputs[1] = g.UseRegister(index); 581 inputs[1] = g.UseRegister(index);
580 input_count = 2; // Base register and index. 582 input_count = 2; // Base register and index.
581 } 583 }
582 584
583 InstructionOperand addr = g.TempRegister(); 585 InstructionOperand addr = g.TempRegister();
584 Emit(add_opcode, 1, &addr, input_count, inputs); 586 Emit(add_opcode, 1, &addr, input_count, inputs);
585 587
586 if (CpuFeatures::IsSupported(NEON)) { 588 if (CpuFeatures::IsSupported(NEON)) {
587 // With NEON we can load directly from the calculated address. 589 // With NEON we can load directly from the calculated address.
588 Emit(kArmVld1F64, g.DefineAsRegister(node), addr); 590 ArchOpcode op = load_rep == MachineRepresentation::kFloat64
591 ? kArmVld1F64
592 : kArmVld1S128;
593 Emit(op, g.DefineAsRegister(node), addr);
589 } else { 594 } else {
595 DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
590 // Load both halves and move to an FP register. 596 // Load both halves and move to an FP register.
591 InstructionOperand fp_lo = g.TempRegister(); 597 InstructionOperand fp_lo = g.TempRegister();
592 InstructionOperand fp_hi = g.TempRegister(); 598 InstructionOperand fp_hi = g.TempRegister();
593 opcode |= AddressingModeField::encode(kMode_Offset_RI); 599 opcode |= AddressingModeField::encode(kMode_Offset_RI);
594 Emit(opcode, fp_lo, addr, g.TempImmediate(0)); 600 Emit(opcode, fp_lo, addr, g.TempImmediate(0));
595 Emit(opcode, fp_hi, addr, g.TempImmediate(4)); 601 Emit(opcode, fp_hi, addr, g.TempImmediate(4));
596 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi); 602 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
597 } 603 }
598 return; 604 return;
599 } 605 }
(...skipping 12 matching lines...) Expand all
612 618
613 InstructionOperand inputs[4]; 619 InstructionOperand inputs[4];
614 size_t input_count = 0; 620 size_t input_count = 0;
615 621
616 UnalignedStoreRepresentation store_rep = 622 UnalignedStoreRepresentation store_rep =
617 UnalignedStoreRepresentationOf(node->op()); 623 UnalignedStoreRepresentationOf(node->op());
618 624
619 // Only floating point stores need to be specially handled; integer stores 625 // Only floating point stores need to be specially handled; integer stores
620 // support unaligned access. We support unaligned FP stores by moving the 626 // support unaligned access. We support unaligned FP stores by moving the
621 // value to integer registers first, then storing to the destination address. 627 // value to integer registers first, then storing to the destination address.
628 // If NEON is supported, we use the vst1.8 instruction.
622 switch (store_rep) { 629 switch (store_rep) {
623 case MachineRepresentation::kFloat32: { 630 case MachineRepresentation::kFloat32: {
624 inputs[input_count++] = g.TempRegister(); 631 inputs[input_count++] = g.TempRegister();
625 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value)); 632 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));
626 inputs[input_count++] = g.UseRegister(base); 633 inputs[input_count++] = g.UseRegister(base);
627 EmitStore(this, kArmStr, input_count, inputs, index); 634 EmitStore(this, kArmStr, input_count, inputs, index);
628 return; 635 return;
629 } 636 }
630 case MachineRepresentation::kFloat64: { 637 case MachineRepresentation::kFloat64:
638 case MachineRepresentation::kSimd128: {
631 if (CpuFeatures::IsSupported(NEON)) { 639 if (CpuFeatures::IsSupported(NEON)) {
632 InstructionOperand address = g.TempRegister(); 640 InstructionOperand address = g.TempRegister();
633 { 641 {
634 // First we have to calculate the actual address. 642 // First we have to calculate the actual address.
635 InstructionCode add_opcode = kArmAdd; 643 InstructionCode add_opcode = kArmAdd;
636 InstructionOperand inputs[3]; 644 InstructionOperand inputs[3];
637 inputs[0] = g.UseRegister(base); 645 inputs[0] = g.UseRegister(base);
638 646
639 size_t input_count; 647 size_t input_count;
640 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count, 648 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
641 &inputs[1])) { 649 &inputs[1])) {
642 // input_count has been set by TryMatchImmediateOrShift(), so 650 // input_count has been set by TryMatchImmediateOrShift(), so
643 // increment it to account for the base register in inputs[0]. 651 // increment it to account for the base register in inputs[0].
644 input_count++; 652 input_count++;
645 } else { 653 } else {
646 add_opcode |= AddressingModeField::encode(kMode_Operand2_R); 654 add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
647 inputs[1] = g.UseRegister(index); 655 inputs[1] = g.UseRegister(index);
648 input_count = 2; // Base register and index. 656 input_count = 2; // Base register and index.
649 } 657 }
650 658
651 Emit(add_opcode, 1, &address, input_count, inputs); 659 Emit(add_opcode, 1, &address, input_count, inputs);
652 } 660 }
653 661
654 inputs[input_count++] = g.UseRegister(value); 662 inputs[input_count++] = g.UseRegister(value);
655 inputs[input_count++] = address; 663 inputs[input_count++] = address;
656 Emit(kArmVst1F64, 0, nullptr, input_count, inputs); 664 ArchOpcode op = store_rep == MachineRepresentation::kFloat64
665 ? kArmVst1F64
666 : kArmVst1S128;
667 Emit(op, 0, nullptr, input_count, inputs);
657 } else { 668 } else {
669 DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
658 // Store a 64-bit floating point value using two 32-bit integer stores. 670 // Store a 64-bit floating point value using two 32-bit integer stores.
659 // Computing the store address here would require three live temporary 671 // Computing the store address here would require three live temporary
660 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after 672 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
661 // storing the least-significant half of the value. 673 // storing the least-significant half of the value.
662 674
663 // First, move the 64-bit FP value into two temporary integer registers. 675 // First, move the 64-bit FP value into two temporary integer registers.
664 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()}; 676 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};
665 inputs[input_count++] = g.UseRegister(value); 677 inputs[input_count++] = g.UseRegister(value);
666 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs); 678 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);
667 679
(...skipping 1836 matching lines...) Expand 10 before | Expand all | Expand 10 after
2504 Vector<MachineType> req_aligned = Vector<MachineType>::New(2); 2516 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);
2505 req_aligned[0] = MachineType::Float32(); 2517 req_aligned[0] = MachineType::Float32();
2506 req_aligned[1] = MachineType::Float64(); 2518 req_aligned[1] = MachineType::Float64();
2507 return MachineOperatorBuilder::AlignmentRequirements:: 2519 return MachineOperatorBuilder::AlignmentRequirements::
2508 SomeUnalignedAccessUnsupported(req_aligned, req_aligned); 2520 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);
2509 } 2521 }
2510 2522
2511 } // namespace compiler 2523 } // namespace compiler
2512 } // namespace internal 2524 } // namespace internal
2513 } // namespace v8 2525 } // namespace v8
OLDNEW
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698