src/ia32/code-stubs-ia32.cc - Issue 5996002: Add untagged double versions of Math.sin and Math.cos.

Unified Diff: src/ia32/code-stubs-ia32.cc

Issue 5996002: Add untagged double versions of Math.sin and Math.cos. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/ia32/code-stubs-ia32.cc

===================================================================

--- src/ia32/code-stubs-ia32.cc (revision 6081)

+++ src/ia32/code-stubs-ia32.cc (working copy)

@@ -2472,41 +2472,66 @@

void TranscendentalCacheStub::Generate(MacroAssembler* masm) {

- // Input on stack:

- // esp[4]: argument (should be number).

- // esp[0]: return address.

- // Test that eax is a number.

+ // TAGGED case:

+ // Input:

+ // esp[4]: tagged number input argument (should be number).

+ // esp[0]: return address.

+ // Output:

+ // eax: tagged double result.

+ // UNTAGGED cast:

Lasse Reichstein 2010/12/20 13:45:49 cast->case.

William Hesse 2010/12/20 13:52:04 Done.

+ // Input::

+ // esp[0]: return address.

+ // xmm1: untagged double input argument

+ // Output:

+ // xmm1: untagged double result.

Label runtime_call;

Label runtime_call_clear_stack;

- NearLabel input_not_smi;

- NearLabel loaded;

- __ mov(eax, Operand(esp, kPointerSize));

- __ test(eax, Immediate(kSmiTagMask));

- __ j(not_zero, &input_not_smi);

- // Input is a smi. Untag and load it onto the FPU stack.

- // Then load the low and high words of the double into ebx, edx.

- STATIC_ASSERT(kSmiTagSize == 1);

- __ sar(eax, 1);

- __ sub(Operand(esp), Immediate(2 * kPointerSize));

- __ mov(Operand(esp, 0), eax);

- __ fild_s(Operand(esp, 0));

- __ fst_d(Operand(esp, 0));

- __ pop(edx);

- __ pop(ebx);

- __ jmp(&loaded);

- __ bind(&input_not_smi);

- // Check if input is a HeapNumber.

- __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));

- __ cmp(Operand(ebx), Immediate(Factory::heap_number_map()));

- __ j(not_equal, &runtime_call);

- // Input is a HeapNumber. Push it on the FPU stack and load its

- // low and high words into ebx, edx.

- __ fld_d(FieldOperand(eax, HeapNumber::kValueOffset));

- __ mov(edx, FieldOperand(eax, HeapNumber::kExponentOffset));

- __ mov(ebx, FieldOperand(eax, HeapNumber::kMantissaOffset));

+ Label skip_cache;

+ Label call_runtime;

+ const bool tagged = (argument_type_ == TAGGED);

+ if (tagged) {

+ // Test that eax is a number.

+ NearLabel input_not_smi;

+ NearLabel loaded;

+ __ mov(eax, Operand(esp, kPointerSize));

+ __ test(eax, Immediate(kSmiTagMask));

+ __ j(not_zero, &input_not_smi);

+ // Input is a smi. Untag and load it onto the FPU stack.

+ // Then load the low and high words of the double into ebx, edx.

+ STATIC_ASSERT(kSmiTagSize == 1);

+ __ sar(eax, 1);

+ __ sub(Operand(esp), Immediate(2 * kPointerSize));

+ __ mov(Operand(esp, 0), eax);

+ __ fild_s(Operand(esp, 0));

+ __ fst_d(Operand(esp, 0));

+ __ pop(edx);

+ __ pop(ebx);

+ __ jmp(&loaded);

+ __ bind(&input_not_smi);

+ // Check if input is a HeapNumber.

+ __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));

+ __ cmp(Operand(ebx), Immediate(Factory::heap_number_map()));

+ __ j(not_equal, &runtime_call);

+ // Input is a HeapNumber. Push it on the FPU stack and load its

+ // low and high words into ebx, edx.

+ __ fld_d(FieldOperand(eax, HeapNumber::kValueOffset));

+ __ mov(edx, FieldOperand(eax, HeapNumber::kExponentOffset));

+ __ mov(ebx, FieldOperand(eax, HeapNumber::kMantissaOffset));

- __ bind(&loaded);

- // ST[0] == double value

+ __ bind(&loaded);

+ } else { // UNTAGGED.

+ if (CpuFeatures::IsSupported(SSE4_1)) {

+ CpuFeatures::Scope sse4_scope(SSE4_1);

+ __ pextrd(Operand(edx), xmm1, 0x1); // copy xmm1[63..32] to edx.

+ } else {

+ __ pshufd(xmm0, xmm1, 0x1);

+ __ movd(Operand(edx), xmm0);

+ }

+ __ movd(Operand(ebx), xmm1);

+ }

+ // ST[0] or xmm1 == double value

// ebx = low 32 bits of double value

// edx = high 32 bits of double value

// Compute hash (the shifts are arithmetic):

@@ -2522,7 +2547,7 @@

ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));

__ and_(Operand(ecx), Immediate(TranscendentalCache::kCacheSize - 1));

- // ST[0] == double value.

+ // ST[0] or xmm1 == double value.

// ebx = low 32 bits of double value.

// edx = high 32 bits of double value.

// ecx = TranscendentalCache::hash(double value).

@@ -2559,31 +2584,72 @@

__ j(not_equal, &cache_miss);

// Cache hit!

__ mov(eax, Operand(ecx, 2 * kIntSize));

- __ fstp(0);

- __ ret(kPointerSize);

+ if (tagged) {

+ __ fstp(0);

+ __ ret(kPointerSize);

+ } else { // UNTAGGED.

+ __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));

+ __ Ret();

+ }

__ bind(&cache_miss);

// Update cache with new value.

// We are short on registers, so use no_reg as scratch.

// This gives slightly larger code.

- __ AllocateHeapNumber(eax, edi, no_reg, &runtime_call_clear_stack);

+ if (tagged) {

+ __ AllocateHeapNumber(eax, edi, no_reg, &runtime_call_clear_stack);

+ } else { // UNTAGGED.

Lasse Reichstein 2010/12/20 13:45:49 Do we need to allocate a number in the UNTAGGED ca

+ __ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);

+ __ sub(Operand(esp), Immediate(kDoubleSize));

+ __ movdbl(Operand(esp, 0), xmm1);

+ __ fld_d(Operand(esp, 0));

+ __ add(Operand(esp), Immediate(kDoubleSize));

+ }

GenerateOperation(masm);

__ mov(Operand(ecx, 0), ebx);

__ mov(Operand(ecx, kIntSize), edx);

__ mov(Operand(ecx, 2 * kIntSize), eax);

__ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));

- __ ret(kPointerSize);

+ if (tagged) {

+ __ ret(kPointerSize);

+ } else { // UNTAGGED.

+ __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));

+ __ Ret();

- __ bind(&runtime_call_clear_stack);

- __ fstp(0);

- __ bind(&runtime_call);

- __ TailCallExternalReference(ExternalReference(RuntimeFunction()), 1, 1);

+ // Skip cache and return answer directly, only in untagged case.

+ __ bind(&skip_cache);

Lasse Reichstein 2010/12/20 13:45:49 Does this mean that we don't cache the result in t

William Hesse 2010/12/20 13:52:04 We only don't cache the result in the untagged cas

+ __ sub(Operand(esp), Immediate(kDoubleSize));

+ __ movdbl(Operand(esp, 0), xmm1);

+ __ fld_d(Operand(esp, 0));

+ GenerateOperation(masm);

+ __ fstp_d(Operand(esp, 0));

+ __ movdbl(xmm1, Operand(esp, 0));

+ __ add(Operand(esp), Immediate(kDoubleSize));

+ __ Ret();

+ }

+ // Call runtime, doing whatever allocation and cleanup is necessary.

+ if (tagged) {

+ __ bind(&runtime_call_clear_stack);

+ __ fstp(0);

+ __ bind(&runtime_call);

+ __ TailCallExternalReference(ExternalReference(RuntimeFunction()), 1, 1);

+ } else { // UNTAGGED.

+ __ bind(&call_runtime);

+ __ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);

+ __ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm1);

+ __ EnterInternalFrame();

+ __ push(eax);

+ __ CallRuntime(RuntimeFunction(), 1);

+ __ LeaveInternalFrame();

+ __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));

+ __ Ret();

+ }

}

Runtime::FunctionId TranscendentalCacheStub::RuntimeFunction() {

switch (type_) {

- // Add more cases when necessary.

case TranscendentalCache::SIN: return Runtime::kMath_sin;

case TranscendentalCache::COS: return Runtime::kMath_cos;

case TranscendentalCache::LOG: return Runtime::kMath_log;

@@ -2596,14 +2662,14 @@

void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {

// Only free register is edi.

- // Input value is on FP stack, and also in ebx/edx. Address of result

- // (a newly allocated HeapNumber) is in eax.

- NearLabel done;

+ // Input value is on FP stack, and also in ebx/edx.

+ // Input value is possibly in xmm1.

+ // Address of result (a newly allocated HeapNumber) may be in eax.

if (type_ == TranscendentalCache::SIN || type_ == TranscendentalCache::COS) {

// Both fsin and fcos require arguments in the range +/-2^63 and

// return NaN for infinities and NaN. They can share all code except

// the actual fsin/fcos operation.

- NearLabel in_range;

+ NearLabel in_range, done;

// If argument is outside the range -2^63..2^63, fsin/cos doesn't

// work. We must reduce it to the appropriate range.

__ mov(edi, edx);

@@ -2683,145 +2749,6 @@

}

-void TranscendentalCacheSSE2Stub::Generate(MacroAssembler* masm) {

- // Input on stack:

- // esp[0]: return address.

- // Input in registers:

- // xmm1: untagged double input argument.

- // Output:

- // xmm1: untagged double result.

- Label skip_cache;

- Label call_runtime;

- // Input is an untagged double in xmm1.

- // Compute hash (the shifts are arithmetic):

- // h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);

- if (CpuFeatures::IsSupported(SSE4_1)) {

- CpuFeatures::Scope sse4_scope(SSE4_1);

- __ pextrd(Operand(edx), xmm1, 0x1); // copy xmm1[63..32] to edx.

- } else {

- __ pshufd(xmm0, xmm1, 0x1);

- __ movd(Operand(edx), xmm0);

- }

- __ movd(Operand(ebx), xmm1);

- // xmm1 = double value

- // ebx = low 32 bits of double value

- // edx = high 32 bits of double value

- // Compute hash (the shifts are arithmetic):

- // h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);

- __ mov(ecx, ebx);

- __ xor_(ecx, Operand(edx));

- __ mov(eax, ecx);

- __ sar(eax, 16);

- __ xor_(ecx, Operand(eax));

- __ mov(eax, ecx);

- __ sar(eax, 8);

- __ xor_(ecx, Operand(eax));

- ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));

- __ and_(Operand(ecx), Immediate(TranscendentalCache::kCacheSize - 1));

- // xmm1 = double value.

- // ebx = low 32 bits of double value.

- // edx = high 32 bits of double value.

- // ecx = TranscendentalCache::hash(double value).

- __ mov(eax,

- Immediate(ExternalReference::transcendental_cache_array_address()));

- // Eax points to cache array.

- __ mov(eax, Operand(eax, type_ * sizeof(TranscendentalCache::caches_[0])));

- // Eax points to the cache for the type type_.

- // If NULL, the cache hasn't been initialized yet, so go through runtime.

- __ test(eax, Operand(eax));

- __ j(zero, &call_runtime);

-#ifdef DEBUG

- // Check that the layout of cache elements match expectations.

- { TranscendentalCache::Element test_elem[2];

- char* elem_start = reinterpret_cast<char*>(&test_elem[0]);

- char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);

- char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));

- char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));

- char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));

- CHECK_EQ(12, elem2_start - elem_start); // Two uint_32's and a pointer.

- CHECK_EQ(0, elem_in0 - elem_start);

- CHECK_EQ(kIntSize, elem_in1 - elem_start);

- CHECK_EQ(2 * kIntSize, elem_out - elem_start);

- }

-#endif

- // Find the address of the ecx'th entry in the cache, i.e., &eax[ecx*12].

- __ lea(ecx, Operand(ecx, ecx, times_2, 0));

- __ lea(ecx, Operand(eax, ecx, times_4, 0));

- // Check if cache matches: Double value is stored in uint32_t[2] array.

- NearLabel cache_miss;

- __ cmp(ebx, Operand(ecx, 0));

- __ j(not_equal, &cache_miss);

- __ cmp(edx, Operand(ecx, kIntSize));

- __ j(not_equal, &cache_miss);

- // Cache hit!

- __ mov(eax, Operand(ecx, 2 * kIntSize));

- __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));

- __ Ret();

- __ bind(&cache_miss);

- // Update cache with new value.

- // We are short on registers, so use no_reg as scratch.

- // This gives slightly larger code.

- __ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);

- __ sub(Operand(esp), Immediate(kDoubleSize));

- __ movdbl(Operand(esp, 0), xmm1);

- __ fld_d(Operand(esp, 0));

- __ add(Operand(esp), Immediate(kDoubleSize));

- GenerateOperation(masm);

- __ mov(Operand(ecx, 0), ebx);

- __ mov(Operand(ecx, kIntSize), edx);

- __ mov(Operand(ecx, 2 * kIntSize), eax);

- __ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));

- __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));

- __ Ret();

- __ bind(&skip_cache);

- __ sub(Operand(esp), Immediate(kDoubleSize));

- __ movdbl(Operand(esp, 0), xmm1);

- __ fld_d(Operand(esp, 0));

- GenerateOperation(masm);

- __ fstp_d(Operand(esp, 0));

- __ movdbl(xmm1, Operand(esp, 0));

- __ add(Operand(esp), Immediate(kDoubleSize));

- __ Ret();

- __ bind(&call_runtime);

- __ AllocateHeapNumber(eax, edi, no_reg, &skip_cache);

- __ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm1);

- __ EnterInternalFrame();

- __ push(eax);

- __ CallRuntime(RuntimeFunction(), 1);

- __ LeaveInternalFrame();

- __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));

- __ Ret();

-Runtime::FunctionId TranscendentalCacheSSE2Stub::RuntimeFunction() {

- switch (type_) {

- // Add more cases when necessary.

- case TranscendentalCache::LOG: return Runtime::kMath_log;

- default:

- UNIMPLEMENTED();

- return Runtime::kAbort;

- }

-void TranscendentalCacheSSE2Stub::GenerateOperation(MacroAssembler* masm) {

- // Only free register is edi.

- // Input value is on FP stack and in xmm1.

- ASSERT(type_ == TranscendentalCache::LOG);

- __ fldln2();

- __ fxch();

- __ fyl2x();

// Get the integer part of a heap number. Surprisingly, all this bit twiddling

// is faster than using the built-in instructions on floating point registers.

// Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the

« no previous file with comments | « src/ia32/code-stubs-ia32.h ('k') | src/ia32/codegen-ia32.cc » ('j') | no next file with comments »