src/arm64/lithium-codegen-arm64.cc - Issue 258793002: ARM64: Generate optimized code for Math.floor and Math.round with double outputs.

Side by Side Diff: src/arm64/lithium-codegen-arm64.cc

Issue 258793002: ARM64: Generate optimized code for Math.floor and Math.round with double outputs. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 the V8 project authors. All rights reserved.	1 // Copyright 2013 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 3766 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3777 Register temp1 = ToRegister(instr->temp1());	3777 Register temp1 = ToRegister(instr->temp1());

3778 Register temp2 = ToRegister(instr->temp2());	3778 Register temp2 = ToRegister(instr->temp2());

3779 Register temp3 = ToRegister(instr->temp3());	3779 Register temp3 = ToRegister(instr->temp3());

3780	3780

3781 MathExpGenerator::EmitMathExp(masm(), input, result,	3781 MathExpGenerator::EmitMathExp(masm(), input, result,

3782 double_temp1, double_temp2,	3782 double_temp1, double_temp2,

3783 temp1, temp2, temp3);	3783 temp1, temp2, temp3);

3784 }	3784 }

3785	3785

3786	3786

3787 void LCodeGen::DoMathFloor(LMathFloor* instr) {	3787 void LCodeGen::DoMathFloorD(LMathFloorD* instr) {

3788 // TODO(jbramley): If we could provide a double result, we could use frintm	3788 DoubleRegister input = ToDoubleRegister(instr->value());

3789 // and produce a valid double result in a single instruction.	3789 DoubleRegister result = ToDoubleRegister(instr->result());

	3790

	3791 __ Frintm(result, input);

	3792 }

	3793

	3794

	3795 void LCodeGen::DoMathFloorI(LMathFloorI* instr) {

3790 DoubleRegister input = ToDoubleRegister(instr->value());	3796 DoubleRegister input = ToDoubleRegister(instr->value());

3791 Register result = ToRegister(instr->result());	3797 Register result = ToRegister(instr->result());

3792	3798

3793 if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {	3799 if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {

3794 DeoptimizeIfMinusZero(input, instr->environment());	3800 DeoptimizeIfMinusZero(input, instr->environment());

3795 }	3801 }

3796	3802

3797 __ Fcvtms(result, input);	3803 __ Fcvtms(result, input);

3798	3804

3799 // Check that the result fits into a 32-bit integer.	3805 // Check that the result fits into a 32-bit integer.

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4012 MathPowStub stub(isolate(), MathPowStub::INTEGER);	4018 MathPowStub stub(isolate(), MathPowStub::INTEGER);

4013 __ CallStub(&stub);	4019 __ CallStub(&stub);

4014 } else {	4020 } else {

4015 ASSERT(exponent_type.IsDouble());	4021 ASSERT(exponent_type.IsDouble());

4016 MathPowStub stub(isolate(), MathPowStub::DOUBLE);	4022 MathPowStub stub(isolate(), MathPowStub::DOUBLE);

4017 __ CallStub(&stub);	4023 __ CallStub(&stub);

4018 }	4024 }

4019 }	4025 }

4020	4026

4021	4027

4022 void LCodeGen::DoMathRound(LMathRound* instr) {	4028 void LCodeGen::DoMathRoundD(LMathRoundD* instr) {

4023 // TODO(jbramley): We could provide a double result here using frint.	4029 DoubleRegister input = ToDoubleRegister(instr->value());

	4030 DoubleRegister result = ToDoubleRegister(instr->result());

	4031 DoubleRegister scratch_d = crankshaft_fp_scratch;

	4032 Register scratch_x = ToRegister(instr->temp());

	4033

	4034 ASSERT(!AreAliased(input, result, scratch_d));

	4035

	4036 Label done;

	4037

	4038 // If the exponent is greater than or equal to the width of the mantissa (52),

	4039 // there is no fractional part.

	4040 __ Fmov(scratch_x, input);

	4041 __ Ubfx(scratch_x, scratch_x, kDoubleMantissaBits, kDoubleExponentBits);

	4042 __ Cmp(scratch_x, Operand(kDoubleExponentBias + kDoubleMantissaBits));

	4043 __ Fmov(result, input);

	4044 __ B(hs, &done);

	4045

	4046 // Otherwise except for the range yielding -0.0, the result is the same as

	4047 // floor(val + 0.5).

	4048 __ Fmov(scratch_d, 0.5);

	4049 __ Fadd(result, input, scratch_d);

	4050 __ Frintm(result, result);

	4051 __ Fcmp(result, 0.0);

	4052 __ B(ne, &done);

	4053

	4054 // Frintn computes the correct result for [-0.5, 0.5[.

	4055 __ Frintn(result, input);

	4056

	4057 __ Bind(&done);

	4058 }

	4059

	4060

	4061 void LCodeGen::DoMathRoundI(LMathRoundI* instr) {

4024 DoubleRegister input = ToDoubleRegister(instr->value());	4062 DoubleRegister input = ToDoubleRegister(instr->value());

4025 DoubleRegister temp1 = ToDoubleRegister(instr->temp1());	4063 DoubleRegister temp1 = ToDoubleRegister(instr->temp1());

4026 Register result = ToRegister(instr->result());	4064 Register result = ToRegister(instr->result());

4027 Label try_rounding;	4065 Label try_rounding;

4028 Label done;	4066 Label done;

4029	4067

4030 // Math.round() rounds to the nearest integer, with ties going towards	4068 // Math.round() rounds to the nearest integer, with ties going towards

4031 // +infinity. This does not match any IEEE-754 rounding mode.	4069 // +infinity. This does not match any IEEE-754 rounding mode.

4032 // - Infinities and NaNs are propagated unchanged, but cause deopts because	4070 // - Infinities and NaNs are propagated unchanged, but cause deopts because

4033 // they can't be represented as integers.	4071 // they can't be represented as integers.

(...skipping 18 matching lines...) Expand all Loading...
4052 __ Mov(result, 1); // +0.5.	4090 __ Mov(result, 1); // +0.5.

4053 // Remaining cases: [+0, +0.5[ or [-0.5, +0.5[, depending on	4091 // Remaining cases: [+0, +0.5[ or [-0.5, +0.5[, depending on

4054 // flag kBailoutOnMinusZero, will return 0 (xzr).	4092 // flag kBailoutOnMinusZero, will return 0 (xzr).

4055 __ Csel(result, result, xzr, eq);	4093 __ Csel(result, result, xzr, eq);

4056 __ B(&done);	4094 __ B(&done);

4057	4095

4058 __ Bind(&try_rounding);	4096 __ Bind(&try_rounding);

4059 // Since we're providing a 32-bit result, we can implement ties-to-infinity by	4097 // Since we're providing a 32-bit result, we can implement ties-to-infinity by

4060 // adding 0.5 to the input, then taking the floor of the result. This does not	4098 // adding 0.5 to the input, then taking the floor of the result. This does not

4061 // work for very large positive doubles because adding 0.5 would cause an	4099 // work for very large positive doubles because adding 0.5 would cause an

4062 // intermediate rounding stage, so a different approach will be necessary if a	4100 // intermediate rounding stage, so a different approach is necessary when a

4063 // double result is needed.	4101 // double result is needed.

4064 __ Fadd(temp1, input, dot_five);	4102 __ Fadd(temp1, input, dot_five);

4065 __ Fcvtms(result, temp1);	4103 __ Fcvtms(result, temp1);

4066	4104

4067 // Deopt if	4105 // Deopt if

4068 // * the input was NaN	4106 // * the input was NaN

4069 // * the result is not representable using a 32-bit integer.	4107 // * the result is not representable using a 32-bit integer.

4070 __ Fcmp(input, 0.0);	4108 __ Fcmp(input, 0.0);

4071 __ Ccmp(result, Operand(result.W(), SXTW), NoFlag, vc);	4109 __ Ccmp(result, Operand(result.W(), SXTW), NoFlag, vc);

4072 DeoptimizeIf(ne, instr->environment());	4110 DeoptimizeIf(ne, instr->environment());

(...skipping 1826 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5899 __ Ldr(result, FieldMemOperand(object, JSObject::kPropertiesOffset));	5937 __ Ldr(result, FieldMemOperand(object, JSObject::kPropertiesOffset));

5900 // Index is equal to negated out of object property index plus 1.	5938 // Index is equal to negated out of object property index plus 1.

5901 __ Sub(result, result, Operand::UntagSmiAndScale(index, kPointerSizeLog2));	5939 __ Sub(result, result, Operand::UntagSmiAndScale(index, kPointerSizeLog2));

5902 __ Ldr(result, FieldMemOperand(result,	5940 __ Ldr(result, FieldMemOperand(result,

5903 FixedArray::kHeaderSize - kPointerSize));	5941 FixedArray::kHeaderSize - kPointerSize));

5904 __ Bind(deferred->exit());	5942 __ Bind(deferred->exit());

5905 __ Bind(&done);	5943 __ Bind(&done);

5906 }	5944 }

5907	5945

5908 } } // namespace v8::internal	5946 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/arm64/lithium-arm64.cc ('k') | src/hydrogen-instructions.h » ('j') | src/hydrogen-instructions.cc » ('J')