OLD | NEW |
1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/arm64/codegen-arm64.h" | 5 #include "src/arm64/codegen-arm64.h" |
6 | 6 |
7 #if V8_TARGET_ARCH_ARM64 | 7 #if V8_TARGET_ARCH_ARM64 |
8 | 8 |
9 #include "src/arm64/simulator-arm64.h" | 9 #include "src/arm64/simulator-arm64.h" |
10 #include "src/codegen.h" | 10 #include "src/codegen.h" |
11 #include "src/macro-assembler.h" | 11 #include "src/macro-assembler.h" |
12 | 12 |
13 namespace v8 { | 13 namespace v8 { |
14 namespace internal { | 14 namespace internal { |
15 | 15 |
16 #define __ ACCESS_MASM(masm) | 16 #define __ ACCESS_MASM(masm) |
17 | 17 |
| 18 #if defined(USE_SIMULATOR) |
| 19 byte* fast_exp_arm64_machine_code = nullptr; |
| 20 double fast_exp_simulator(double x, Isolate* isolate) { |
| 21 Simulator * simulator = Simulator::current(isolate); |
| 22 Simulator::CallArgument args[] = { |
| 23 Simulator::CallArgument(x), |
| 24 Simulator::CallArgument::End() |
| 25 }; |
| 26 return simulator->CallDouble(fast_exp_arm64_machine_code, args); |
| 27 } |
| 28 #endif |
| 29 |
| 30 |
| 31 UnaryMathFunctionWithIsolate CreateExpFunction(Isolate* isolate) { |
| 32 // Use the Math.exp implemetation in MathExpGenerator::EmitMathExp() to create |
| 33 // an AAPCS64-compliant exp() function. This will be faster than the C |
| 34 // library's exp() function, but probably less accurate. |
| 35 size_t actual_size; |
| 36 byte* buffer = |
| 37 static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true)); |
| 38 if (buffer == nullptr) return nullptr; |
| 39 |
| 40 ExternalReference::InitializeMathExpData(); |
| 41 MacroAssembler masm(isolate, buffer, static_cast<int>(actual_size), |
| 42 CodeObjectRequired::kNo); |
| 43 masm.SetStackPointer(csp); |
| 44 |
| 45 // The argument will be in d0 on entry. |
| 46 DoubleRegister input = d0; |
| 47 // Use other caller-saved registers for all other values. |
| 48 DoubleRegister result = d1; |
| 49 DoubleRegister double_temp1 = d2; |
| 50 DoubleRegister double_temp2 = d3; |
| 51 Register temp1 = x10; |
| 52 Register temp2 = x11; |
| 53 Register temp3 = x12; |
| 54 |
| 55 MathExpGenerator::EmitMathExp(&masm, input, result, |
| 56 double_temp1, double_temp2, |
| 57 temp1, temp2, temp3); |
| 58 // Move the result to the return register. |
| 59 masm.Fmov(d0, result); |
| 60 masm.Ret(); |
| 61 |
| 62 CodeDesc desc; |
| 63 masm.GetCode(&desc); |
| 64 DCHECK(!RelocInfo::RequiresRelocation(desc)); |
| 65 |
| 66 Assembler::FlushICache(isolate, buffer, actual_size); |
| 67 base::OS::ProtectCode(buffer, actual_size); |
| 68 |
| 69 #if !defined(USE_SIMULATOR) |
| 70 return FUNCTION_CAST<UnaryMathFunctionWithIsolate>(buffer); |
| 71 #else |
| 72 fast_exp_arm64_machine_code = buffer; |
| 73 return &fast_exp_simulator; |
| 74 #endif |
| 75 } |
| 76 |
| 77 |
18 UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) { | 78 UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) { |
19 return nullptr; | 79 return nullptr; |
20 } | 80 } |
21 | 81 |
22 | 82 |
23 // ------------------------------------------------------------------------- | 83 // ------------------------------------------------------------------------- |
24 // Platform-specific RuntimeCallHelper functions. | 84 // Platform-specific RuntimeCallHelper functions. |
25 | 85 |
26 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const { | 86 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const { |
27 masm->EnterFrame(StackFrame::INTERNAL); | 87 masm->EnterFrame(StackFrame::INTERNAL); |
(...skipping 415 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
443 __ TestAndBranchIfAnySet(result, kStringEncodingMask, &one_byte); | 503 __ TestAndBranchIfAnySet(result, kStringEncodingMask, &one_byte); |
444 // Two-byte string. | 504 // Two-byte string. |
445 __ Ldrh(result, MemOperand(string, index, SXTW, 1)); | 505 __ Ldrh(result, MemOperand(string, index, SXTW, 1)); |
446 __ B(&done); | 506 __ B(&done); |
447 __ Bind(&one_byte); | 507 __ Bind(&one_byte); |
448 // One-byte string. | 508 // One-byte string. |
449 __ Ldrb(result, MemOperand(string, index, SXTW)); | 509 __ Ldrb(result, MemOperand(string, index, SXTW)); |
450 __ Bind(&done); | 510 __ Bind(&done); |
451 } | 511 } |
452 | 512 |
| 513 |
| 514 static MemOperand ExpConstant(Register base, int index) { |
| 515 return MemOperand(base, index * kDoubleSize); |
| 516 } |
| 517 |
| 518 |
| 519 void MathExpGenerator::EmitMathExp(MacroAssembler* masm, |
| 520 DoubleRegister input, |
| 521 DoubleRegister result, |
| 522 DoubleRegister double_temp1, |
| 523 DoubleRegister double_temp2, |
| 524 Register temp1, |
| 525 Register temp2, |
| 526 Register temp3) { |
| 527 // TODO(jbramley): There are several instances where fnmsub could be used |
| 528 // instead of fmul and fsub. Doing this changes the result, but since this is |
| 529 // an estimation anyway, does it matter? |
| 530 |
| 531 DCHECK(!AreAliased(input, result, |
| 532 double_temp1, double_temp2, |
| 533 temp1, temp2, temp3)); |
| 534 DCHECK(ExternalReference::math_exp_constants(0).address() != NULL); |
| 535 DCHECK(!masm->serializer_enabled()); // External references not serializable. |
| 536 |
| 537 Label done; |
| 538 DoubleRegister double_temp3 = result; |
| 539 Register constants = temp3; |
| 540 |
| 541 // The algorithm used relies on some magic constants which are initialized in |
| 542 // ExternalReference::InitializeMathExpData(). |
| 543 |
| 544 // Load the address of the start of the array. |
| 545 __ Mov(constants, ExternalReference::math_exp_constants(0)); |
| 546 |
| 547 // We have to do a four-way split here: |
| 548 // - If input <= about -708.4, the output always rounds to zero. |
| 549 // - If input >= about 709.8, the output always rounds to +infinity. |
| 550 // - If the input is NaN, the output is NaN. |
| 551 // - Otherwise, the result needs to be calculated. |
| 552 Label result_is_finite_non_zero; |
| 553 // Assert that we can load offset 0 (the small input threshold) and offset 1 |
| 554 // (the large input threshold) with a single ldp. |
| 555 DCHECK(kDRegSize == (ExpConstant(constants, 1).offset() - |
| 556 ExpConstant(constants, 0).offset())); |
| 557 __ Ldp(double_temp1, double_temp2, ExpConstant(constants, 0)); |
| 558 |
| 559 __ Fcmp(input, double_temp1); |
| 560 __ Fccmp(input, double_temp2, NoFlag, hi); |
| 561 // At this point, the condition flags can be in one of five states: |
| 562 // NZCV |
| 563 // 1000 -708.4 < input < 709.8 result = exp(input) |
| 564 // 0110 input == 709.8 result = +infinity |
| 565 // 0010 input > 709.8 result = +infinity |
| 566 // 0011 input is NaN result = input |
| 567 // 0000 input <= -708.4 result = +0.0 |
| 568 |
| 569 // Continue the common case first. 'mi' tests N == 1. |
| 570 __ B(&result_is_finite_non_zero, mi); |
| 571 |
| 572 // TODO(jbramley): Consider adding a +infinity register for ARM64. |
| 573 __ Ldr(double_temp2, ExpConstant(constants, 2)); // Synthesize +infinity. |
| 574 |
| 575 // Select between +0.0 and +infinity. 'lo' tests C == 0. |
| 576 __ Fcsel(result, fp_zero, double_temp2, lo); |
| 577 // Select between {+0.0 or +infinity} and input. 'vc' tests V == 0. |
| 578 __ Fcsel(result, result, input, vc); |
| 579 __ B(&done); |
| 580 |
| 581 // The rest is magic, as described in InitializeMathExpData(). |
| 582 __ Bind(&result_is_finite_non_zero); |
| 583 |
| 584 // Assert that we can load offset 3 and offset 4 with a single ldp. |
| 585 DCHECK(kDRegSize == (ExpConstant(constants, 4).offset() - |
| 586 ExpConstant(constants, 3).offset())); |
| 587 __ Ldp(double_temp1, double_temp3, ExpConstant(constants, 3)); |
| 588 __ Fmadd(double_temp1, double_temp1, input, double_temp3); |
| 589 __ Fmov(temp2.W(), double_temp1.S()); |
| 590 __ Fsub(double_temp1, double_temp1, double_temp3); |
| 591 |
| 592 // Assert that we can load offset 5 and offset 6 with a single ldp. |
| 593 DCHECK(kDRegSize == (ExpConstant(constants, 6).offset() - |
| 594 ExpConstant(constants, 5).offset())); |
| 595 __ Ldp(double_temp2, double_temp3, ExpConstant(constants, 5)); |
| 596 // TODO(jbramley): Consider using Fnmsub here. |
| 597 __ Fmul(double_temp1, double_temp1, double_temp2); |
| 598 __ Fsub(double_temp1, double_temp1, input); |
| 599 |
| 600 __ Fmul(double_temp2, double_temp1, double_temp1); |
| 601 __ Fsub(double_temp3, double_temp3, double_temp1); |
| 602 __ Fmul(double_temp3, double_temp3, double_temp2); |
| 603 |
| 604 __ Mov(temp1.W(), Operand(temp2.W(), LSR, 11)); |
| 605 |
| 606 __ Ldr(double_temp2, ExpConstant(constants, 7)); |
| 607 // TODO(jbramley): Consider using Fnmsub here. |
| 608 __ Fmul(double_temp3, double_temp3, double_temp2); |
| 609 __ Fsub(double_temp3, double_temp3, double_temp1); |
| 610 |
| 611 // The 8th constant is 1.0, so use an immediate move rather than a load. |
| 612 // We can't generate a runtime assertion here as we would need to call Abort |
| 613 // in the runtime and we don't have an Isolate when we generate this code. |
| 614 __ Fmov(double_temp2, 1.0); |
| 615 __ Fadd(double_temp3, double_temp3, double_temp2); |
| 616 |
| 617 __ And(temp2, temp2, 0x7ff); |
| 618 __ Add(temp1, temp1, 0x3ff); |
| 619 |
| 620 // Do the final table lookup. |
| 621 __ Mov(temp3, ExternalReference::math_exp_log_table()); |
| 622 |
| 623 __ Add(temp3, temp3, Operand(temp2, LSL, kDRegSizeLog2)); |
| 624 __ Ldp(temp2.W(), temp3.W(), MemOperand(temp3)); |
| 625 __ Orr(temp1.W(), temp3.W(), Operand(temp1.W(), LSL, 20)); |
| 626 __ Bfi(temp2, temp1, 32, 32); |
| 627 __ Fmov(double_temp1, temp2); |
| 628 |
| 629 __ Fmul(result, double_temp3, double_temp1); |
| 630 |
| 631 __ Bind(&done); |
| 632 } |
| 633 |
453 #undef __ | 634 #undef __ |
454 | 635 |
455 } // namespace internal | 636 } // namespace internal |
456 } // namespace v8 | 637 } // namespace v8 |
457 | 638 |
458 #endif // V8_TARGET_ARCH_ARM64 | 639 #endif // V8_TARGET_ARCH_ARM64 |
OLD | NEW |