src/compiler/arm64/code-generator-arm64.cc - Issue 1360603003: [arm64] Implement Float(32|64)(Min|Max) using fcsel.

Side by Side Diff: src/compiler/arm64/code-generator-arm64.cc

Issue 1360603003: [arm64] Implement Float(32|64)(Min|Max) using fcsel. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 the V8 project authors. All rights reserved.	1 // Copyright 2014 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/compiler/code-generator.h"	5 #include "src/compiler/code-generator.h"

6	6

7 #include "src/arm64/frames-arm64.h"	7 #include "src/arm64/frames-arm64.h"

8 #include "src/arm64/macro-assembler-arm64.h"	8 #include "src/arm64/macro-assembler-arm64.h"

9 #include "src/compiler/code-generator-impl.h"	9 #include "src/compiler/code-generator-impl.h"

10 #include "src/compiler/gap-resolver.h"	10 #include "src/compiler/gap-resolver.h"

(...skipping 367 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
378 if (descriptor->IsJSFunctionCall() \|\| stack_slots > 0) {	378 if (descriptor->IsJSFunctionCall() \|\| stack_slots > 0) {

379 __ Mov(jssp, fp);	379 __ Mov(jssp, fp);

380 __ Pop(fp, lr);	380 __ Pop(fp, lr);

381 }	381 }

382 }	382 }

383	383

384	384

385 // Assembles an instruction after register allocation, producing machine code.	385 // Assembles an instruction after register allocation, producing machine code.

386 void CodeGenerator::AssembleArchInstruction(Instruction* instr) {	386 void CodeGenerator::AssembleArchInstruction(Instruction* instr) {

387 Arm64OperandConverter i(this, instr);	387 Arm64OperandConverter i(this, instr);

388 InstructionCode opcode = instr->opcode();	388 switch (instr->arch_opcode()) {

389 switch (ArchOpcodeField::decode(opcode)) {

390 case kArchCallCodeObject: {	389 case kArchCallCodeObject: {

391 EnsureSpaceForLazyDeopt();	390 EnsureSpaceForLazyDeopt();

392 if (instr->InputAt(0)->IsImmediate()) {	391 if (instr->InputAt(0)->IsImmediate()) {

393 __ Call(Handle<Code>::cast(i.InputHeapObject(0)),	392 __ Call(Handle<Code>::cast(i.InputHeapObject(0)),

394 RelocInfo::CODE_TARGET);	393 RelocInfo::CODE_TARGET);

395 } else {	394 } else {

396 Register target = i.InputRegister(0);	395 Register target = i.InputRegister(0);

397 __ Add(target, target, Code::kHeaderSize - kHeapObjectTag);	396 __ Add(target, target, Code::kHeaderSize - kHeapObjectTag);

398 __ Call(target);	397 __ Call(target);

399 }	398 }

(...skipping 99 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
499 case kArm64Float64RoundTruncate:	498 case kArm64Float64RoundTruncate:

500 __ Frintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));	499 __ Frintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));

501 break;	500 break;

502 case kArm64Float64RoundUp:	501 case kArm64Float64RoundUp:

503 __ Frintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));	502 __ Frintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));

504 break;	503 break;

505 case kArm64Add:	504 case kArm64Add:

506 __ Add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));	505 __ Add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));

507 break;	506 break;

508 case kArm64Add32:	507 case kArm64Add32:

509 if (FlagsModeField::decode(opcode) != kFlags_none) {	508 if (instr->flags_mode() != kFlags_none) {

510 __ Adds(i.OutputRegister32(), i.InputRegister32(0),	509 __ Adds(i.OutputRegister32(), i.InputRegister32(0),

511 i.InputOperand2_32(1));	510 i.InputOperand2_32(1));

512 } else {	511 } else {

513 __ Add(i.OutputRegister32(), i.InputRegister32(0),	512 __ Add(i.OutputRegister32(), i.InputRegister32(0),

514 i.InputOperand2_32(1));	513 i.InputOperand2_32(1));

515 }	514 }

516 break;	515 break;

517 case kArm64And:	516 case kArm64And:

518 __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));	517 __ And(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));

519 break;	518 break;

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
635 case kArm64Eon:	634 case kArm64Eon:

636 __ Eon(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));	635 __ Eon(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));

637 break;	636 break;

638 case kArm64Eon32:	637 case kArm64Eon32:

639 __ Eon(i.OutputRegister32(), i.InputRegister32(0), i.InputOperand2_32(1));	638 __ Eon(i.OutputRegister32(), i.InputRegister32(0), i.InputOperand2_32(1));

640 break;	639 break;

641 case kArm64Sub:	640 case kArm64Sub:

642 __ Sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));	641 __ Sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2_64(1));

643 break;	642 break;

644 case kArm64Sub32:	643 case kArm64Sub32:

645 if (FlagsModeField::decode(opcode) != kFlags_none) {	644 if (instr->flags_mode() != kFlags_none) {

646 __ Subs(i.OutputRegister32(), i.InputRegister32(0),	645 __ Subs(i.OutputRegister32(), i.InputRegister32(0),

647 i.InputOperand2_32(1));	646 i.InputOperand2_32(1));

648 } else {	647 } else {

649 __ Sub(i.OutputRegister32(), i.InputRegister32(0),	648 __ Sub(i.OutputRegister32(), i.InputRegister32(0),

650 i.InputOperand2_32(1));	649 i.InputOperand2_32(1));

651 }	650 }

652 break;	651 break;

653 case kArm64Lsl:	652 case kArm64Lsl:

654 ASSEMBLE_SHIFT(Lsl, 64);	653 ASSEMBLE_SHIFT(Lsl, 64);

655 break;	654 break;

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
741 break;	740 break;

742 case kArm64Cmn32:	741 case kArm64Cmn32:

743 __ Cmn(i.InputRegister32(0), i.InputOperand32(1));	742 __ Cmn(i.InputRegister32(0), i.InputOperand32(1));

744 break;	743 break;

745 case kArm64Tst:	744 case kArm64Tst:

746 __ Tst(i.InputRegister(0), i.InputOperand(1));	745 __ Tst(i.InputRegister(0), i.InputOperand(1));

747 break;	746 break;

748 case kArm64Tst32:	747 case kArm64Tst32:

749 __ Tst(i.InputRegister32(0), i.InputOperand32(1));	748 __ Tst(i.InputRegister32(0), i.InputOperand32(1));

750 break;	749 break;

	750 case kArm64Float32CmpAndFloat32Sel:
	Benedikt Meurer 2015/09/21 17:31:43 Hm, this instruction looks redundant. Why do you n Hm, this instruction looks redundant. Why do you need it? jbramley 2015/09/22 07:45:20 Because, although I haven't done so yet, we might Show quoted text On 2015/09/21 17:31:43, Benedikt Meurer wrote: > Hm, this instruction looks redundant. Why do you need it? Because, although I haven't done so yet, we might want the type of the select arguments to differ from the type of the comparison. Encoding that in the architecture-specific part of the opcode seemed the most sensible thing to do.
	751 DCHECK(instr->flags_mode() == kFlags_select);

	752 // Fall through.

751 case kArm64Float32Cmp:	753 case kArm64Float32Cmp:

752 if (instr->InputAt(1)->IsDoubleRegister()) {	754 if (instr->InputAt(1)->IsDoubleRegister()) {

753 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32Register(1));	755 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32Register(1));

754 } else {	756 } else {

755 DCHECK(instr->InputAt(1)->IsImmediate());	757 DCHECK(instr->InputAt(1)->IsImmediate());

756 // 0.0 is the only immediate supported by fcmp instructions.	758 // 0.0 is the only immediate supported by fcmp instructions.

757 DCHECK(i.InputDouble(1) == 0.0);	759 DCHECK(i.InputDouble(1) == 0.0);

758 __ Fcmp(i.InputFloat32Register(0), i.InputDouble(1));	760 __ Fcmp(i.InputFloat32Register(0), i.InputDouble(1));

759 }	761 }

760 break;	762 break;

(...skipping 20 matching lines...) Expand all Loading...
781 case kArm64Float32Min:	783 case kArm64Float32Min:

782 __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),	784 __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),

783 i.InputFloat32Register(1));	785 i.InputFloat32Register(1));

784 break;	786 break;

785 case kArm64Float32Abs:	787 case kArm64Float32Abs:

786 __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));	788 __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));

787 break;	789 break;

788 case kArm64Float32Sqrt:	790 case kArm64Float32Sqrt:

789 __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));	791 __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));

790 break;	792 break;

	793 case kArm64Float64CmpAndFloat64Sel:
	Benedikt Meurer 2015/09/21 17:31:43 Same here? Same here?
	794 DCHECK(instr->flags_mode() == kFlags_select);

	795 // Fall through.

791 case kArm64Float64Cmp:	796 case kArm64Float64Cmp:

792 if (instr->InputAt(1)->IsDoubleRegister()) {	797 if (instr->InputAt(1)->IsDoubleRegister()) {

793 __ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));	798 __ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));

794 } else {	799 } else {

795 DCHECK(instr->InputAt(1)->IsImmediate());	800 DCHECK(instr->InputAt(1)->IsImmediate());

796 // 0.0 is the only immediate supported by fcmp instructions.	801 // 0.0 is the only immediate supported by fcmp instructions.

797 DCHECK(i.InputDouble(1) == 0.0);	802 DCHECK(i.InputDouble(1) == 0.0);

798 __ Fcmp(i.InputDoubleRegister(0), i.InputDouble(1));	803 __ Fcmp(i.InputDoubleRegister(0), i.InputDouble(1));

799 }	804 }

800 break;	805 break;

(...skipping 261 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1062	1067

1063 // Materialize a full 64-bit 1 or 0 value. The result register is always the	1068 // Materialize a full 64-bit 1 or 0 value. The result register is always the

1064 // last output of the instruction.	1069 // last output of the instruction.

1065 DCHECK_NE(0u, instr->OutputCount());	1070 DCHECK_NE(0u, instr->OutputCount());

1066 Register reg = i.OutputRegister(instr->OutputCount() - 1);	1071 Register reg = i.OutputRegister(instr->OutputCount() - 1);

1067 Condition cc = FlagsConditionToCondition(condition);	1072 Condition cc = FlagsConditionToCondition(condition);

1068 __ Cset(reg, cc);	1073 __ Cset(reg, cc);

1069 }	1074 }

1070	1075

1071	1076

	1077 void CodeGenerator::AssembleArchSelect(Instruction* instr,

	1078 FlagsCondition condition) {

	1079 Arm64OperandConverter i(this, instr);

	1080

	1081 DCHECK_EQ(1u, instr->OutputCount());

	1082

	1083 Condition cc = FlagsConditionToCondition(condition);

	1084 switch (instr->arch_opcode()) {

	1085 case kArm64Float32CmpAndFloat32Sel: {
	Benedikt Meurer 2015/09/21 17:31:43 I'm not sure if this approach scales well. Because I'm not sure if this approach scales well. Because the select is actually independent of the comparison, the operands of the comparison, conceptionally. Did you consider using different kinds of Select, i.e. SelectFloat32, SelectFloat64 and so on? jbramley 2015/09/22 07:45:21 Indeed, but (unless I'm mistaken), TF can't track Show quoted text On 2015/09/21 17:31:43, Benedikt Meurer wrote: > I'm not sure if this approach scales well. Because the select is actually > independent of the comparison, the operands of the comparison, conceptionally. > Did you consider using different kinds of Select, i.e. SelectFloat32, > SelectFloat64 and so on? Indeed, but (unless I'm mistaken), TF can't track flags dependencies between instructions. In all other cases, instructions that produce flags always consume them (either explicitly or using a FlagsContinuation). Is there a better way to handle that?
	1086 DoubleRegister result = i.OutputFloat32Register();

	1087 DoubleRegister a = i.InputFloat32Register(instr->InputCount() - 2);

	1088 DoubleRegister b = i.InputFloat32Register(instr->InputCount() - 1);

	1089 __ Fcsel(result, a, b, cc);

	1090 return;

	1091 }

	1092 case kArm64Float64CmpAndFloat64Sel: {

	1093 DoubleRegister result = i.OutputFloat64Register();

	1094 DoubleRegister a = i.InputFloat64Register(instr->InputCount() - 2);

	1095 DoubleRegister b = i.InputFloat64Register(instr->InputCount() - 1);

	1096 __ Fcsel(result, a, b, cc);

	1097 return;

	1098 }

	1099 default:

	1100 UNREACHABLE();

	1101 return;

	1102 }

	1103 }

	1104

	1105

1072 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {	1106 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {

1073 Arm64OperandConverter i(this, instr);	1107 Arm64OperandConverter i(this, instr);

1074 Register input = i.InputRegister32(0);	1108 Register input = i.InputRegister32(0);

1075 for (size_t index = 2; index < instr->InputCount(); index += 2) {	1109 for (size_t index = 2; index < instr->InputCount(); index += 2) {

1076 __ Cmp(input, i.InputInt32(index + 0));	1110 __ Cmp(input, i.InputInt32(index + 0));

1077 __ B(eq, GetLabel(i.InputRpo(index + 1)));	1111 __ B(eq, GetLabel(i.InputRpo(index + 1)));

1078 }	1112 }

1079 AssembleArchJump(i.InputRpo(1));	1113 AssembleArchJump(i.InputRpo(1));

1080 }	1114 }

1081	1115

(...skipping 318 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1400 padding_size -= kInstructionSize;	1434 padding_size -= kInstructionSize;

1401 }	1435 }

1402 }	1436 }

1403 }	1437 }

1404	1438

1405 #undef __	1439 #undef __

1406	1440

1407 } // namespace compiler	1441 } // namespace compiler

1408 } // namespace internal	1442 } // namespace internal

1409 } // namespace v8	1443 } // namespace v8

OLD	NEW

« no previous file with comments | « src/compiler/arm/code-generator-arm.cc ('k') | src/compiler/arm64/instruction-codes-arm64.h » ('j') | no next file with comments »