Index: src/x64/codegen-x64.cc |
diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc |
index 9575a294eba7d7d54a71076ac679318e65131722..5f65c7f7a273e3571a7d559d022c95fa9377fcb9 100644 |
--- a/src/x64/codegen-x64.cc |
+++ b/src/x64/codegen-x64.cc |
@@ -4517,19 +4517,19 @@ void CodeGenerator::GenerateCallFunction(ZoneList<Expression*>* args) { |
void CodeGenerator::GenerateMathSin(ZoneList<Expression*>* args) { |
ASSERT_EQ(args->length(), 1); |
- // Load the argument on the stack and jump to the runtime. |
Load(args->at(0)); |
- Result answer = frame_->CallRuntime(Runtime::kMath_sin, 1); |
- frame_->Push(&answer); |
+ TranscendentalCacheStub stub(TranscendentalCache::SIN); |
+ Result result = frame_->CallStub(&stub, 1); |
+ frame_->Push(&result); |
} |
void CodeGenerator::GenerateMathCos(ZoneList<Expression*>* args) { |
ASSERT_EQ(args->length(), 1); |
- // Load the argument on the stack and jump to the runtime. |
Load(args->at(0)); |
- Result answer = frame_->CallRuntime(Runtime::kMath_cos, 1); |
- frame_->Push(&answer); |
+ TranscendentalCacheStub stub(TranscendentalCache::COS); |
+ Result result = frame_->CallStub(&stub, 1); |
+ frame_->Push(&result); |
} |
@@ -7397,6 +7397,218 @@ bool CodeGenerator::FoldConstantSmis(Token::Value op, int left, int right) { |
// End of CodeGenerator implementation. |
+void TranscendentalCacheStub::Generate(MacroAssembler* masm) { |
+ // Input on stack: |
+ // rsp[8]: argument (should be number). |
+ // rsp[0]: return address. |
+ // Test that eax is a number. |
Mads Ager (chromium)
2010/05/03 09:14:46
eax -> rax
Move to after the label declarations?
Lasse Reichstein
2010/05/03 10:33:44
Done.
|
+ Label runtime_call; |
+ Label runtime_call_clear_stack; |
+ Label input_not_smi; |
+ Label loaded; |
+ __ movq(rax, Operand(rsp, kPointerSize)); |
+ __ JumpIfNotSmi(rax, &input_not_smi); |
+ // Input is a smi. Untag and load it onto the FPU stack. |
+ // Then load the bits of the double into rbx. |
+ ASSERT_EQ(1, kSmiTagSize); |
Mads Ager (chromium)
2010/05/03 09:14:46
Does the assert add anything when you are using th
Lasse Reichstein
2010/05/03 10:33:44
Removed.
|
+ __ SmiToInteger32(rax, rax); |
+ __ subq(rsp, Immediate(kPointerSize)); |
+ __ cvtlsi2sd(xmm1, rax); |
+ __ movsd(Operand(rsp, 0), xmm1); |
+ __ movq(rbx, xmm1); |
+ __ movq(rdx, xmm1); |
+ __ fld_d(Operand(rsp, 0)); |
+ __ addq(rsp, Immediate(kPointerSize)); |
+ __ jmp(&loaded); |
+ |
+ __ bind(&input_not_smi); |
+ // Check if input is a HeapNumber. |
+ __ Move(rbx, Factory::heap_number_map()); |
+ __ cmpq(rbx, FieldOperand(rax, HeapObject::kMapOffset)); |
Mads Ager (chromium)
2010/05/03 09:14:46
Can we use CompareRoot here?
Lasse Reichstein
2010/05/03 10:33:44
We can, but in this case I don't want to. Using th
|
+ __ j(not_equal, &runtime_call); |
+ // Input is a HeapNumber. Push it on the FPU stack and load its |
+ // bits into rbx. |
+ __ fld_d(FieldOperand(rax, HeapNumber::kValueOffset)); |
+ __ movq(rbx, FieldOperand(rax, HeapNumber::kValueOffset)); |
+ __ movq(rdx, rbx); |
+ __ bind(&loaded); |
+ // ST[0] == double value |
+ // rbx = bits of double value. |
+ // rdx = also bits of double value. |
+ // Compute hash (h is 32 bits, bits are 64): |
+ // h = h0 = bits ^ (bits >> 32); |
+ // h ^= h >> 16; |
+ // h ^= h >> 8; |
+ // h = h & (cacheSize - 1); |
+ // or h = (h0 ^ (h0 >> 8) ^ (h0 >> 16) ^ (h0 >> 24)) & cacheSize - 1 |
Mads Ager (chromium)
2010/05/03 09:14:46
add parenthesis (cacheSize - 1)?
Lasse Reichstein
2010/05/03 10:33:44
Done.
|
+ __ sar(rdx, Immediate(32)); |
+ __ xorl(rdx, rbx); |
+ __ movl(rcx, rdx); |
+ __ movl(rax, rdx); |
+ __ movl(rdi, rdx); |
+ __ sarl(rdx, Immediate(8)); |
+ __ sarl(rcx, Immediate(16)); |
+ __ sarl(rax, Immediate(24)); |
+ __ xorl(rcx, rdx); |
+ __ xorl(rax, rdi); |
+ __ xorl(rcx, rax); |
+ ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize)); |
+ __ andl(rcx, Immediate(TranscendentalCache::kCacheSize - 1)); |
+ // ST[0] == double value. |
+ // rbx = bits of double value. |
+ // rcx = TranscendentalCache::hash(double value). |
+ __ movq(rax, ExternalReference::transcendental_cache_array_address()); |
+ // rax points to cache array. |
+ __ movq(rax, Operand(rax, type_ * sizeof(TranscendentalCache::caches_[0]))); |
+ // rax points to the cache for the type type_. |
+ // If NULL, the cache hasn't been initialized yet, so go through runtime. |
+ __ testq(rax, rax); |
+ __ j(zero, &runtime_call_clear_stack); |
+#ifdef DEBUG |
+ // Check that the layout of cache elements match expectations. |
+ { // NOLINT - doesn't like a single brace on a line. |
Mads Ager (chromium)
2010/05/03 09:14:46
Will the linter be happy with:
{ Transcendenta
Lasse Reichstein
2010/05/03 10:33:44
It probably will, but I'd hate it instead. I'd rat
|
+ TranscendentalCache::Element test_elem[2]; |
+ char* elem_start = reinterpret_cast<char*>(&test_elem[0]); |
+ char* elem2_start = reinterpret_cast<char*>(&test_elem[1]); |
+ char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0])); |
+ char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1])); |
+ char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output)); |
+ // Two uint_32's and a pointer per element. |
+ CHECK_EQ(16, static_cast<int>(elem2_start - elem_start)); |
+ CHECK_EQ(0, static_cast<int>(elem_in0 - elem_start)); |
+ CHECK_EQ(kIntSize, static_cast<int>(elem_in1 - elem_start)); |
+ CHECK_EQ(2 * kIntSize, static_cast<int>(elem_out - elem_start)); |
+ } |
+#endif |
+ // Find the address of the rcx'th entry in the cache, i.e., &rax[rcx*16]. |
+ __ addl(rcx, rcx); |
+ __ lea(rcx, Operand(rax, rcx, times_8, 0));\ |
Mads Ager (chromium)
2010/05/03 09:14:46
Remove '\' at end of line.
Lasse Reichstein
2010/05/03 10:33:44
Whoops.
|
+ // Check if cache matches: Double value is stored in uint32_t[2] array. |
+ Label cache_miss; |
+ __ cmpq(rbx, Operand(rcx, 0)); |
+ __ j(not_equal, &cache_miss); |
+ // Cache hit! |
+ __ movq(rax, Operand(rcx, 2 * kIntSize)); |
+ __ fstp(0); // Clear FPU stack. |
+ __ ret(kPointerSize); |
+ |
+ __ bind(&cache_miss); |
+ // Update cache with new value. |
+ // We are short on registers, so use no_reg as scratch. |
Mads Ager (chromium)
2010/05/03 09:14:46
This is a left over comment from ia32, remove?
Lasse Reichstein
2010/05/03 10:33:44
Removed.
|
+ // This gives slightly larger code. |
+ __ AllocateHeapNumber(rax, rdi, &runtime_call_clear_stack); |
+ GenerateOperation(masm); |
+ __ movq(Operand(rcx, 0), rbx); |
+ __ movq(Operand(rcx, 2 * kIntSize), rax); |
+ __ fstp_d(FieldOperand(rax, HeapNumber::kValueOffset)); |
+ __ ret(kPointerSize); |
+ |
+ __ bind(&runtime_call_clear_stack); |
+ __ fstp(0); |
+ __ bind(&runtime_call); |
+ __ TailCallExternalReference(ExternalReference(RuntimeFunction()), 1, 1); |
+} |
+ |
+ |
+Runtime::FunctionId TranscendentalCacheStub::RuntimeFunction() { |
+ switch (type_) { |
+ // Add more cases when necessary. |
+ case TranscendentalCache::SIN: return Runtime::kMath_sin; |
+ case TranscendentalCache::COS: return Runtime::kMath_cos; |
+ default: |
+ UNIMPLEMENTED(); |
+ return Runtime::kAbort; |
+ } |
+} |
+ |
+ |
+void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) { |
+ // Only free register is edi. |
Mads Ager (chromium)
2010/05/03 09:14:46
rdi
Lasse Reichstein
2010/05/03 10:33:44
fixed.
|
+ Label done; |
+ ASSERT(type_ == TranscendentalCache::SIN || |
+ type_ == TranscendentalCache::COS); |
+ // More transcendental types can be added later. |
+ |
+ // Both fsin and fcos require arguments in the range +/-2^63 and |
+ // return NaN for infinities and NaN. They can share all code except |
+ // the actual fsin/fcos operation. |
+ Label in_range; |
+ // If argument is outside the range -2^63..2^63, fsin/cos doesn't |
+ // work. We must reduce it to the appropriate range. |
+ __ movq(rdi, rbx); |
+ __ shr(rdi, Immediate(52)); // Exponent |
+ __ andl(rdi, Immediate(0x7ff)); |
Mads Ager (chromium)
2010/05/03 09:14:46
Should we use a couple of named constants for this
Lasse Reichstein
2010/05/03 10:33:44
Fixed.
|
+ int supported_exponent_limit = |
+ (63 + HeapNumber::kExponentBias); |
Mads Ager (chromium)
2010/05/03 09:14:46
This will fit on line above?
Lasse Reichstein
2010/05/03 10:33:44
Done.
|
+ __ cmpl(rdi, Immediate(supported_exponent_limit)); |
+ __ j(below, &in_range); |
+ // Check for infinity and NaN. Both return NaN for sin. |
+ __ cmpl(rdi, Immediate(0x7ff)); |
+ Label non_nan_result; |
+ __ j(not_equal, &non_nan_result); |
+ // Input is +/-Infinity or NaN. Result is NaN. |
+ __ fstp(0); // Clear fpu stack. |
+ // NaN is represented by 0x7ff8000000000000. |
+ __ movq(rdi, static_cast<uint64_t>(0x7ff8)<<48, RelocInfo::NONE); |
Mads Ager (chromium)
2010/05/03 09:14:46
We should use a named constant for the nan represe
Lasse Reichstein
2010/05/03 10:33:44
Done.
|
+ __ push(rdi); |
+ __ fld_d(Operand(rsp, 0)); |
+ __ addq(rsp, Immediate(kPointerSize)); |
+ __ jmp(&done); |
+ |
+ __ bind(&non_nan_result); |
+ |
+ // Use fpmod to restrict argument to the range +/-2*PI. |
+ __ movq(rdi, rax); // Save rax before using fnstsw_ax. |
+ __ fldpi(); |
+ __ fadd(0); |
+ __ fld(1); |
+ // FPU Stack: input, 2*pi, input. |
+ { |
+ Label no_exceptions; |
+ __ fwait(); |
+ __ fnstsw_ax(); |
+ // Clear if Illegal Operand or Zero Division exceptions are set. |
+ __ testl(rax, Immediate(5)); |
+ __ j(zero, &no_exceptions); |
+ __ fnclex(); |
+ __ bind(&no_exceptions); |
+ } |
+ |
+ // Compute st(0) % st(1) |
+ { |
+ Label partial_remainder_loop; |
+ __ bind(&partial_remainder_loop); |
+ __ fprem1(); |
+ __ fwait(); |
+ __ fnstsw_ax(); |
+ __ testl(rax, Immediate(0x400 /* C2 */)); |
Mads Ager (chromium)
2010/05/03 09:14:46
We usually do not put comment in here. This seems
Lasse Reichstein
2010/05/03 10:33:44
C2 moved to comment.
|
+ // If C2 is set, computation only has partial result. Loop to |
+ // continue computation. |
+ __ j(not_zero, &partial_remainder_loop); |
+ } |
+ // FPU Stack: input, 2*pi, input % 2*pi |
+ __ fstp(2); |
+ // FPU Stack: input % 2*pi, 2*pi, |
+ __ fstp(0); |
+ // FPU Stack: input % 2*pi |
+ __ movq(rax, rdi); // Restore eax (allocated HeapNumber pointer). |
Lasse Reichstein
2010/05/03 10:33:44
eax->rax.
|
+ |
+ // FPU Stack: input % 2*pi |
+ __ bind(&in_range); |
+ switch (type_) { |
+ case TranscendentalCache::SIN: |
+ __ fsin(); |
+ break; |
+ case TranscendentalCache::COS: |
+ __ fcos(); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ } |
+ __ bind(&done); |
+} |
+ |
+ |
// Get the integer part of a heap number. Surprisingly, all this bit twiddling |
// is faster than using the built-in instructions on floating point registers. |
// Trashes rdi and rbx. Dest is rcx. Source cannot be rcx or one of the |