OLD | NEW |
1 // Copyright 2013 the V8 project authors. All rights reserved. | 1 // Copyright 2013 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/arm64/codegen-arm64.h" | 5 #include "src/arm64/codegen-arm64.h" |
6 | 6 |
7 #if V8_TARGET_ARCH_ARM64 | 7 #if V8_TARGET_ARCH_ARM64 |
8 | 8 |
9 #include "src/arm64/simulator-arm64.h" | 9 #include "src/arm64/simulator-arm64.h" |
10 #include "src/codegen.h" | 10 #include "src/codegen.h" |
11 #include "src/macro-assembler.h" | 11 #include "src/macro-assembler.h" |
12 | 12 |
13 namespace v8 { | 13 namespace v8 { |
14 namespace internal { | 14 namespace internal { |
15 | 15 |
16 #define __ ACCESS_MASM(masm) | 16 #define __ ACCESS_MASM(masm) |
17 | 17 |
18 #if defined(USE_SIMULATOR) | |
19 byte* fast_exp_arm64_machine_code = nullptr; | |
20 double fast_exp_simulator(double x, Isolate* isolate) { | |
21 Simulator * simulator = Simulator::current(isolate); | |
22 Simulator::CallArgument args[] = { | |
23 Simulator::CallArgument(x), | |
24 Simulator::CallArgument::End() | |
25 }; | |
26 return simulator->CallDouble(fast_exp_arm64_machine_code, args); | |
27 } | |
28 #endif | |
29 | |
30 | |
31 UnaryMathFunctionWithIsolate CreateExpFunction(Isolate* isolate) { | |
32 // Use the Math.exp implemetation in MathExpGenerator::EmitMathExp() to create | |
33 // an AAPCS64-compliant exp() function. This will be faster than the C | |
34 // library's exp() function, but probably less accurate. | |
35 size_t actual_size; | |
36 byte* buffer = | |
37 static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true)); | |
38 if (buffer == nullptr) return nullptr; | |
39 | |
40 ExternalReference::InitializeMathExpData(); | |
41 MacroAssembler masm(isolate, buffer, static_cast<int>(actual_size), | |
42 CodeObjectRequired::kNo); | |
43 masm.SetStackPointer(csp); | |
44 | |
45 // The argument will be in d0 on entry. | |
46 DoubleRegister input = d0; | |
47 // Use other caller-saved registers for all other values. | |
48 DoubleRegister result = d1; | |
49 DoubleRegister double_temp1 = d2; | |
50 DoubleRegister double_temp2 = d3; | |
51 Register temp1 = x10; | |
52 Register temp2 = x11; | |
53 Register temp3 = x12; | |
54 | |
55 MathExpGenerator::EmitMathExp(&masm, input, result, | |
56 double_temp1, double_temp2, | |
57 temp1, temp2, temp3); | |
58 // Move the result to the return register. | |
59 masm.Fmov(d0, result); | |
60 masm.Ret(); | |
61 | |
62 CodeDesc desc; | |
63 masm.GetCode(&desc); | |
64 DCHECK(!RelocInfo::RequiresRelocation(desc)); | |
65 | |
66 Assembler::FlushICache(isolate, buffer, actual_size); | |
67 base::OS::ProtectCode(buffer, actual_size); | |
68 | |
69 #if !defined(USE_SIMULATOR) | |
70 return FUNCTION_CAST<UnaryMathFunctionWithIsolate>(buffer); | |
71 #else | |
72 fast_exp_arm64_machine_code = buffer; | |
73 return &fast_exp_simulator; | |
74 #endif | |
75 } | |
76 | |
77 | |
78 UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) { | 18 UnaryMathFunctionWithIsolate CreateSqrtFunction(Isolate* isolate) { |
79 return nullptr; | 19 return nullptr; |
80 } | 20 } |
81 | 21 |
82 | 22 |
83 // ------------------------------------------------------------------------- | 23 // ------------------------------------------------------------------------- |
84 // Platform-specific RuntimeCallHelper functions. | 24 // Platform-specific RuntimeCallHelper functions. |
85 | 25 |
86 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const { | 26 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const { |
87 masm->EnterFrame(StackFrame::INTERNAL); | 27 masm->EnterFrame(StackFrame::INTERNAL); |
(...skipping 415 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
503 __ TestAndBranchIfAnySet(result, kStringEncodingMask, &one_byte); | 443 __ TestAndBranchIfAnySet(result, kStringEncodingMask, &one_byte); |
504 // Two-byte string. | 444 // Two-byte string. |
505 __ Ldrh(result, MemOperand(string, index, SXTW, 1)); | 445 __ Ldrh(result, MemOperand(string, index, SXTW, 1)); |
506 __ B(&done); | 446 __ B(&done); |
507 __ Bind(&one_byte); | 447 __ Bind(&one_byte); |
508 // One-byte string. | 448 // One-byte string. |
509 __ Ldrb(result, MemOperand(string, index, SXTW)); | 449 __ Ldrb(result, MemOperand(string, index, SXTW)); |
510 __ Bind(&done); | 450 __ Bind(&done); |
511 } | 451 } |
512 | 452 |
513 | |
514 static MemOperand ExpConstant(Register base, int index) { | |
515 return MemOperand(base, index * kDoubleSize); | |
516 } | |
517 | |
518 | |
519 void MathExpGenerator::EmitMathExp(MacroAssembler* masm, | |
520 DoubleRegister input, | |
521 DoubleRegister result, | |
522 DoubleRegister double_temp1, | |
523 DoubleRegister double_temp2, | |
524 Register temp1, | |
525 Register temp2, | |
526 Register temp3) { | |
527 // TODO(jbramley): There are several instances where fnmsub could be used | |
528 // instead of fmul and fsub. Doing this changes the result, but since this is | |
529 // an estimation anyway, does it matter? | |
530 | |
531 DCHECK(!AreAliased(input, result, | |
532 double_temp1, double_temp2, | |
533 temp1, temp2, temp3)); | |
534 DCHECK(ExternalReference::math_exp_constants(0).address() != NULL); | |
535 DCHECK(!masm->serializer_enabled()); // External references not serializable. | |
536 | |
537 Label done; | |
538 DoubleRegister double_temp3 = result; | |
539 Register constants = temp3; | |
540 | |
541 // The algorithm used relies on some magic constants which are initialized in | |
542 // ExternalReference::InitializeMathExpData(). | |
543 | |
544 // Load the address of the start of the array. | |
545 __ Mov(constants, ExternalReference::math_exp_constants(0)); | |
546 | |
547 // We have to do a four-way split here: | |
548 // - If input <= about -708.4, the output always rounds to zero. | |
549 // - If input >= about 709.8, the output always rounds to +infinity. | |
550 // - If the input is NaN, the output is NaN. | |
551 // - Otherwise, the result needs to be calculated. | |
552 Label result_is_finite_non_zero; | |
553 // Assert that we can load offset 0 (the small input threshold) and offset 1 | |
554 // (the large input threshold) with a single ldp. | |
555 DCHECK(kDRegSize == (ExpConstant(constants, 1).offset() - | |
556 ExpConstant(constants, 0).offset())); | |
557 __ Ldp(double_temp1, double_temp2, ExpConstant(constants, 0)); | |
558 | |
559 __ Fcmp(input, double_temp1); | |
560 __ Fccmp(input, double_temp2, NoFlag, hi); | |
561 // At this point, the condition flags can be in one of five states: | |
562 // NZCV | |
563 // 1000 -708.4 < input < 709.8 result = exp(input) | |
564 // 0110 input == 709.8 result = +infinity | |
565 // 0010 input > 709.8 result = +infinity | |
566 // 0011 input is NaN result = input | |
567 // 0000 input <= -708.4 result = +0.0 | |
568 | |
569 // Continue the common case first. 'mi' tests N == 1. | |
570 __ B(&result_is_finite_non_zero, mi); | |
571 | |
572 // TODO(jbramley): Consider adding a +infinity register for ARM64. | |
573 __ Ldr(double_temp2, ExpConstant(constants, 2)); // Synthesize +infinity. | |
574 | |
575 // Select between +0.0 and +infinity. 'lo' tests C == 0. | |
576 __ Fcsel(result, fp_zero, double_temp2, lo); | |
577 // Select between {+0.0 or +infinity} and input. 'vc' tests V == 0. | |
578 __ Fcsel(result, result, input, vc); | |
579 __ B(&done); | |
580 | |
581 // The rest is magic, as described in InitializeMathExpData(). | |
582 __ Bind(&result_is_finite_non_zero); | |
583 | |
584 // Assert that we can load offset 3 and offset 4 with a single ldp. | |
585 DCHECK(kDRegSize == (ExpConstant(constants, 4).offset() - | |
586 ExpConstant(constants, 3).offset())); | |
587 __ Ldp(double_temp1, double_temp3, ExpConstant(constants, 3)); | |
588 __ Fmadd(double_temp1, double_temp1, input, double_temp3); | |
589 __ Fmov(temp2.W(), double_temp1.S()); | |
590 __ Fsub(double_temp1, double_temp1, double_temp3); | |
591 | |
592 // Assert that we can load offset 5 and offset 6 with a single ldp. | |
593 DCHECK(kDRegSize == (ExpConstant(constants, 6).offset() - | |
594 ExpConstant(constants, 5).offset())); | |
595 __ Ldp(double_temp2, double_temp3, ExpConstant(constants, 5)); | |
596 // TODO(jbramley): Consider using Fnmsub here. | |
597 __ Fmul(double_temp1, double_temp1, double_temp2); | |
598 __ Fsub(double_temp1, double_temp1, input); | |
599 | |
600 __ Fmul(double_temp2, double_temp1, double_temp1); | |
601 __ Fsub(double_temp3, double_temp3, double_temp1); | |
602 __ Fmul(double_temp3, double_temp3, double_temp2); | |
603 | |
604 __ Mov(temp1.W(), Operand(temp2.W(), LSR, 11)); | |
605 | |
606 __ Ldr(double_temp2, ExpConstant(constants, 7)); | |
607 // TODO(jbramley): Consider using Fnmsub here. | |
608 __ Fmul(double_temp3, double_temp3, double_temp2); | |
609 __ Fsub(double_temp3, double_temp3, double_temp1); | |
610 | |
611 // The 8th constant is 1.0, so use an immediate move rather than a load. | |
612 // We can't generate a runtime assertion here as we would need to call Abort | |
613 // in the runtime and we don't have an Isolate when we generate this code. | |
614 __ Fmov(double_temp2, 1.0); | |
615 __ Fadd(double_temp3, double_temp3, double_temp2); | |
616 | |
617 __ And(temp2, temp2, 0x7ff); | |
618 __ Add(temp1, temp1, 0x3ff); | |
619 | |
620 // Do the final table lookup. | |
621 __ Mov(temp3, ExternalReference::math_exp_log_table()); | |
622 | |
623 __ Add(temp3, temp3, Operand(temp2, LSL, kDRegSizeLog2)); | |
624 __ Ldp(temp2.W(), temp3.W(), MemOperand(temp3)); | |
625 __ Orr(temp1.W(), temp3.W(), Operand(temp1.W(), LSL, 20)); | |
626 __ Bfi(temp2, temp1, 32, 32); | |
627 __ Fmov(double_temp1, temp2); | |
628 | |
629 __ Fmul(result, double_temp3, double_temp1); | |
630 | |
631 __ Bind(&done); | |
632 } | |
633 | |
634 #undef __ | 453 #undef __ |
635 | 454 |
636 } // namespace internal | 455 } // namespace internal |
637 } // namespace v8 | 456 } // namespace v8 |
638 | 457 |
639 #endif // V8_TARGET_ARCH_ARM64 | 458 #endif // V8_TARGET_ARCH_ARM64 |
OLD | NEW |