| Index: src/arm64/codegen-arm64.cc
|
| diff --git a/src/arm64/codegen-arm64.cc b/src/arm64/codegen-arm64.cc
|
| index edd289900e978d6d63ce8bb13f076bb272f1fd8f..990dd4101fe04ba51a07af6af6732b367c42157b 100644
|
| --- a/src/arm64/codegen-arm64.cc
|
| +++ b/src/arm64/codegen-arm64.cc
|
| @@ -14,6 +14,66 @@
|
| namespace internal {
|
|
|
| #define __ ACCESS_MASM(masm)
|
| +
|
| +#if defined(USE_SIMULATOR)
|
| +byte* fast_exp_arm64_machine_code = nullptr;
|
| +double fast_exp_simulator(double x, Isolate* isolate) {
|
| + Simulator * simulator = Simulator::current(isolate);
|
| + Simulator::CallArgument args[] = {
|
| + Simulator::CallArgument(x),
|
| + Simulator::CallArgument::End()
|
| + };
|
| + return simulator->CallDouble(fast_exp_arm64_machine_code, args);
|
| +}
|
| +#endif
|
| +
|
| +
|
| +UnaryMathFunctionWithIsolate CreateExpFunction(Isolate* isolate) {
|
| + // Use the Math.exp implemetation in MathExpGenerator::EmitMathExp() to create
|
| + // an AAPCS64-compliant exp() function. This will be faster than the C
|
| + // library's exp() function, but probably less accurate.
|
| + size_t actual_size;
|
| + byte* buffer =
|
| + static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true));
|
| + if (buffer == nullptr) return nullptr;
|
| +
|
| + ExternalReference::InitializeMathExpData();
|
| + MacroAssembler masm(isolate, buffer, static_cast<int>(actual_size),
|
| + CodeObjectRequired::kNo);
|
| + masm.SetStackPointer(csp);
|
| +
|
| + // The argument will be in d0 on entry.
|
| + DoubleRegister input = d0;
|
| + // Use other caller-saved registers for all other values.
|
| + DoubleRegister result = d1;
|
| + DoubleRegister double_temp1 = d2;
|
| + DoubleRegister double_temp2 = d3;
|
| + Register temp1 = x10;
|
| + Register temp2 = x11;
|
| + Register temp3 = x12;
|
| +
|
| + MathExpGenerator::EmitMathExp(&masm, input, result,
|
| + double_temp1, double_temp2,
|
| + temp1, temp2, temp3);
|
| + // Move the result to the return register.
|
| + masm.Fmov(d0, result);
|
| + masm.Ret();
|
| +
|
| + CodeDesc desc;
|
| + masm.GetCode(&desc);
|
| + DCHECK(!RelocInfo::RequiresRelocation(desc));
|
| +
|
| + Assembler::FlushICache(isolate, buffer, actual_size);
|
| + base::OS::ProtectCode(buffer, actual_size);
|
| +
|
| +#if !defined(USE_SIMULATOR)
|
| + return FUNCTION_CAST<UnaryMathFunctionWithIsolate>(buffer);
|
| +#else
|
| + fast_exp_arm64_machine_code = buffer;
|
| + return &fast_exp_simulator;
|
| +#endif
|
| +}
|
| +
|
|
|
| UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) {
|
| return nullptr;
|
| @@ -450,6 +510,127 @@
|
| __ Bind(&done);
|
| }
|
|
|
| +
|
| +static MemOperand ExpConstant(Register base, int index) {
|
| + return MemOperand(base, index * kDoubleSize);
|
| +}
|
| +
|
| +
|
| +void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
|
| + DoubleRegister input,
|
| + DoubleRegister result,
|
| + DoubleRegister double_temp1,
|
| + DoubleRegister double_temp2,
|
| + Register temp1,
|
| + Register temp2,
|
| + Register temp3) {
|
| + // TODO(jbramley): There are several instances where fnmsub could be used
|
| + // instead of fmul and fsub. Doing this changes the result, but since this is
|
| + // an estimation anyway, does it matter?
|
| +
|
| + DCHECK(!AreAliased(input, result,
|
| + double_temp1, double_temp2,
|
| + temp1, temp2, temp3));
|
| + DCHECK(ExternalReference::math_exp_constants(0).address() != NULL);
|
| + DCHECK(!masm->serializer_enabled()); // External references not serializable.
|
| +
|
| + Label done;
|
| + DoubleRegister double_temp3 = result;
|
| + Register constants = temp3;
|
| +
|
| + // The algorithm used relies on some magic constants which are initialized in
|
| + // ExternalReference::InitializeMathExpData().
|
| +
|
| + // Load the address of the start of the array.
|
| + __ Mov(constants, ExternalReference::math_exp_constants(0));
|
| +
|
| + // We have to do a four-way split here:
|
| + // - If input <= about -708.4, the output always rounds to zero.
|
| + // - If input >= about 709.8, the output always rounds to +infinity.
|
| + // - If the input is NaN, the output is NaN.
|
| + // - Otherwise, the result needs to be calculated.
|
| + Label result_is_finite_non_zero;
|
| + // Assert that we can load offset 0 (the small input threshold) and offset 1
|
| + // (the large input threshold) with a single ldp.
|
| + DCHECK(kDRegSize == (ExpConstant(constants, 1).offset() -
|
| + ExpConstant(constants, 0).offset()));
|
| + __ Ldp(double_temp1, double_temp2, ExpConstant(constants, 0));
|
| +
|
| + __ Fcmp(input, double_temp1);
|
| + __ Fccmp(input, double_temp2, NoFlag, hi);
|
| + // At this point, the condition flags can be in one of five states:
|
| + // NZCV
|
| + // 1000 -708.4 < input < 709.8 result = exp(input)
|
| + // 0110 input == 709.8 result = +infinity
|
| + // 0010 input > 709.8 result = +infinity
|
| + // 0011 input is NaN result = input
|
| + // 0000 input <= -708.4 result = +0.0
|
| +
|
| + // Continue the common case first. 'mi' tests N == 1.
|
| + __ B(&result_is_finite_non_zero, mi);
|
| +
|
| + // TODO(jbramley): Consider adding a +infinity register for ARM64.
|
| + __ Ldr(double_temp2, ExpConstant(constants, 2)); // Synthesize +infinity.
|
| +
|
| + // Select between +0.0 and +infinity. 'lo' tests C == 0.
|
| + __ Fcsel(result, fp_zero, double_temp2, lo);
|
| + // Select between {+0.0 or +infinity} and input. 'vc' tests V == 0.
|
| + __ Fcsel(result, result, input, vc);
|
| + __ B(&done);
|
| +
|
| + // The rest is magic, as described in InitializeMathExpData().
|
| + __ Bind(&result_is_finite_non_zero);
|
| +
|
| + // Assert that we can load offset 3 and offset 4 with a single ldp.
|
| + DCHECK(kDRegSize == (ExpConstant(constants, 4).offset() -
|
| + ExpConstant(constants, 3).offset()));
|
| + __ Ldp(double_temp1, double_temp3, ExpConstant(constants, 3));
|
| + __ Fmadd(double_temp1, double_temp1, input, double_temp3);
|
| + __ Fmov(temp2.W(), double_temp1.S());
|
| + __ Fsub(double_temp1, double_temp1, double_temp3);
|
| +
|
| + // Assert that we can load offset 5 and offset 6 with a single ldp.
|
| + DCHECK(kDRegSize == (ExpConstant(constants, 6).offset() -
|
| + ExpConstant(constants, 5).offset()));
|
| + __ Ldp(double_temp2, double_temp3, ExpConstant(constants, 5));
|
| + // TODO(jbramley): Consider using Fnmsub here.
|
| + __ Fmul(double_temp1, double_temp1, double_temp2);
|
| + __ Fsub(double_temp1, double_temp1, input);
|
| +
|
| + __ Fmul(double_temp2, double_temp1, double_temp1);
|
| + __ Fsub(double_temp3, double_temp3, double_temp1);
|
| + __ Fmul(double_temp3, double_temp3, double_temp2);
|
| +
|
| + __ Mov(temp1.W(), Operand(temp2.W(), LSR, 11));
|
| +
|
| + __ Ldr(double_temp2, ExpConstant(constants, 7));
|
| + // TODO(jbramley): Consider using Fnmsub here.
|
| + __ Fmul(double_temp3, double_temp3, double_temp2);
|
| + __ Fsub(double_temp3, double_temp3, double_temp1);
|
| +
|
| + // The 8th constant is 1.0, so use an immediate move rather than a load.
|
| + // We can't generate a runtime assertion here as we would need to call Abort
|
| + // in the runtime and we don't have an Isolate when we generate this code.
|
| + __ Fmov(double_temp2, 1.0);
|
| + __ Fadd(double_temp3, double_temp3, double_temp2);
|
| +
|
| + __ And(temp2, temp2, 0x7ff);
|
| + __ Add(temp1, temp1, 0x3ff);
|
| +
|
| + // Do the final table lookup.
|
| + __ Mov(temp3, ExternalReference::math_exp_log_table());
|
| +
|
| + __ Add(temp3, temp3, Operand(temp2, LSL, kDRegSizeLog2));
|
| + __ Ldp(temp2.W(), temp3.W(), MemOperand(temp3));
|
| + __ Orr(temp1.W(), temp3.W(), Operand(temp1.W(), LSL, 20));
|
| + __ Bfi(temp2, temp1, 32, 32);
|
| + __ Fmov(double_temp1, temp2);
|
| +
|
| + __ Fmul(result, double_temp3, double_temp1);
|
| +
|
| + __ Bind(&done);
|
| +}
|
| +
|
| #undef __
|
|
|
| } // namespace internal
|
|
|