src/x64/codegen-x64.cc - Issue 1860001: X64: Port inline transcendental cache to X64.

Side by Side Diff: src/x64/codegen-x64.cc

Issue 1860001: X64: Port inline transcendental cache to X64. (Closed)

Patch Set: Created 10 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 4499 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4510 Load(args->at(i + 1));	4510 Load(args->at(i + 1));

4511 }	4511 }

4512 Load(args->at(n_args + 1)); // function	4512 Load(args->at(n_args + 1)); // function

4513 Result result = frame_->CallJSFunction(n_args);	4513 Result result = frame_->CallJSFunction(n_args);

4514 frame_->Push(&result);	4514 frame_->Push(&result);

4515 }	4515 }

4516	4516

4517	4517

4518 void CodeGenerator::GenerateMathSin(ZoneList<Expression> args) {	4518 void CodeGenerator::GenerateMathSin(ZoneList<Expression> args) {

4519 ASSERT_EQ(args->length(), 1);	4519 ASSERT_EQ(args->length(), 1);

4520 // Load the argument on the stack and jump to the runtime.

4521 Load(args->at(0));	4520 Load(args->at(0));

4522 Result answer = frame_->CallRuntime(Runtime::kMath_sin, 1);	4521 TranscendentalCacheStub stub(TranscendentalCache::SIN);

4523 frame_->Push(&answer);	4522 Result result = frame_->CallStub(&stub, 1);

	4523 frame_->Push(&result);

4524 }	4524 }

4525	4525

4526	4526

4527 void CodeGenerator::GenerateMathCos(ZoneList<Expression> args) {	4527 void CodeGenerator::GenerateMathCos(ZoneList<Expression> args) {

4528 ASSERT_EQ(args->length(), 1);	4528 ASSERT_EQ(args->length(), 1);

4529 // Load the argument on the stack and jump to the runtime.

4530 Load(args->at(0));	4529 Load(args->at(0));

4531 Result answer = frame_->CallRuntime(Runtime::kMath_cos, 1);	4530 TranscendentalCacheStub stub(TranscendentalCache::COS);

4532 frame_->Push(&answer);	4531 Result result = frame_->CallStub(&stub, 1);

	4532 frame_->Push(&result);

4533 }	4533 }

4534	4534

4535	4535

4536 void CodeGenerator::GenerateStringAdd(ZoneList<Expression> args) {	4536 void CodeGenerator::GenerateStringAdd(ZoneList<Expression> args) {

4537 ASSERT_EQ(2, args->length());	4537 ASSERT_EQ(2, args->length());

4538	4538

4539 Load(args->at(0));	4539 Load(args->at(0));

4540 Load(args->at(1));	4540 Load(args->at(1));

4541	4541

4542 StringAddStub stub(NO_STRING_ADD_FLAGS);	4542 StringAddStub stub(NO_STRING_ADD_FLAGS);

(...skipping 2847 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7390 if (answer_object == Heap::undefined_value()) {	7390 if (answer_object == Heap::undefined_value()) {

7391 return false;	7391 return false;

7392 }	7392 }

7393 frame_->Push(Handle<Object>(answer_object));	7393 frame_->Push(Handle<Object>(answer_object));

7394 return true;	7394 return true;

7395 }	7395 }

7396	7396

7397	7397

7398 // End of CodeGenerator implementation.	7398 // End of CodeGenerator implementation.

7399	7399

	7400 void TranscendentalCacheStub::Generate(MacroAssembler* masm) {

	7401 // Input on stack:

	7402 // rsp[8]: argument (should be number).

	7403 // rsp[0]: return address.

	7404 // Test that eax is a number.
	Mads Ager (chromium) 2010/05/03 09:14:46 eax -> rax Move to after the label declarations? eax -> rax Move to after the label declarations? Lasse Reichstein 2010/05/03 10:33:44 Done. Show quoted text On 2010/05/03 09:14:46, Mads Ager wrote: > eax -> rax > > Move to after the label declarations? Done.
	7405 Label runtime_call;

	7406 Label runtime_call_clear_stack;

	7407 Label input_not_smi;

	7408 Label loaded;

	7409 __ movq(rax, Operand(rsp, kPointerSize));

	7410 __ JumpIfNotSmi(rax, &input_not_smi);

	7411 // Input is a smi. Untag and load it onto the FPU stack.

	7412 // Then load the bits of the double into rbx.

	7413 ASSERT_EQ(1, kSmiTagSize);
	Mads Ager (chromium) 2010/05/03 09:14:46 Does the assert add anything when you are using th Does the assert add anything when you are using the SmiToInteger32 macro or should we just remove it? Lasse Reichstein 2010/05/03 10:33:44 Removed. Removed.
	7414 __ SmiToInteger32(rax, rax);

	7415 __ subq(rsp, Immediate(kPointerSize));

	7416 __ cvtlsi2sd(xmm1, rax);

	7417 __ movsd(Operand(rsp, 0), xmm1);

	7418 __ movq(rbx, xmm1);

	7419 __ movq(rdx, xmm1);

	7420 __ fld_d(Operand(rsp, 0));

	7421 __ addq(rsp, Immediate(kPointerSize));

	7422 __ jmp(&loaded);

	7423

	7424 __ bind(&input_not_smi);

	7425 // Check if input is a HeapNumber.

	7426 __ Move(rbx, Factory::heap_number_map());

	7427 __ cmpq(rbx, FieldOperand(rax, HeapObject::kMapOffset));
	Mads Ager (chromium) 2010/05/03 09:14:46 Can we use CompareRoot here? Can we use CompareRoot here? Lasse Reichstein 2010/05/03 10:33:44 We can, but in this case I don't want to. Using th We can, but in this case I don't want to. Using the root array costs a load and is somewhat shorter, but I'm afraid the load can't be hoisted very much since we are just after a conditional jump target, so we will be introducing extra latency.
	7428 __ j(not_equal, &runtime_call);

	7429 // Input is a HeapNumber. Push it on the FPU stack and load its

	7430 // bits into rbx.

	7431 __ fld_d(FieldOperand(rax, HeapNumber::kValueOffset));

	7432 __ movq(rbx, FieldOperand(rax, HeapNumber::kValueOffset));

	7433 __ movq(rdx, rbx);

	7434 __ bind(&loaded);

	7435 // ST[0] == double value

	7436 // rbx = bits of double value.

	7437 // rdx = also bits of double value.

	7438 // Compute hash (h is 32 bits, bits are 64):

	7439 // h = h0 = bits ^ (bits >> 32);

	7440 // h ^= h >> 16;

	7441 // h ^= h >> 8;

	7442 // h = h & (cacheSize - 1);

	7443 // or h = (h0 ^ (h0 >> 8) ^ (h0 >> 16) ^ (h0 >> 24)) & cacheSize - 1
	Mads Ager (chromium) 2010/05/03 09:14:46 add parenthesis (cacheSize - 1)? add parenthesis (cacheSize - 1)? Lasse Reichstein 2010/05/03 10:33:44 Done. Show quoted text On 2010/05/03 09:14:46, Mads Ager wrote: > add parenthesis (cacheSize - 1)? Done.
	7444 __ sar(rdx, Immediate(32));

	7445 __ xorl(rdx, rbx);

	7446 __ movl(rcx, rdx);

	7447 __ movl(rax, rdx);

	7448 __ movl(rdi, rdx);

	7449 __ sarl(rdx, Immediate(8));

	7450 __ sarl(rcx, Immediate(16));

	7451 __ sarl(rax, Immediate(24));

	7452 __ xorl(rcx, rdx);

	7453 __ xorl(rax, rdi);

	7454 __ xorl(rcx, rax);

	7455 ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));

	7456 __ andl(rcx, Immediate(TranscendentalCache::kCacheSize - 1));

	7457 // ST[0] == double value.

	7458 // rbx = bits of double value.

	7459 // rcx = TranscendentalCache::hash(double value).

	7460 __ movq(rax, ExternalReference::transcendental_cache_array_address());

	7461 // rax points to cache array.

	7462 __ movq(rax, Operand(rax, type_ * sizeof(TranscendentalCache::caches_[0])));

	7463 // rax points to the cache for the type type_.

	7464 // If NULL, the cache hasn't been initialized yet, so go through runtime.

	7465 __ testq(rax, rax);

	7466 __ j(zero, &runtime_call_clear_stack);

	7467 #ifdef DEBUG

	7468 // Check that the layout of cache elements match expectations.

	7469 { // NOLINT - doesn't like a single brace on a line.
	Mads Ager (chromium) 2010/05/03 09:14:46 Will the linter be happy with: { Transcendenta Will the linter be happy with: { TranscendentalCache::Element ...; ... } ? Lasse Reichstein 2010/05/03 10:33:44 It probably will, but I'd hate it instead. I'd rat It probably will, but I'd hate it instead. I'd rather remove the braces entirely.
	7470 TranscendentalCache::Element test_elem[2];

	7471 char* elem_start = reinterpret_cast<char*>(&test_elem[0]);

	7472 char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);

	7473 char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));

	7474 char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));

	7475 char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));

	7476 // Two uint_32's and a pointer per element.

	7477 CHECK_EQ(16, static_cast<int>(elem2_start - elem_start));

	7478 CHECK_EQ(0, static_cast<int>(elem_in0 - elem_start));

	7479 CHECK_EQ(kIntSize, static_cast<int>(elem_in1 - elem_start));

	7480 CHECK_EQ(2 * kIntSize, static_cast<int>(elem_out - elem_start));

	7481 }

	7482 #endif

	7483 // Find the address of the rcx'th entry in the cache, i.e., &rax[rcx*16].

	7484 __ addl(rcx, rcx);

	7485 __ lea(rcx, Operand(rax, rcx, times_8, 0));\
	Mads Ager (chromium) 2010/05/03 09:14:46 Remove '\' at end of line. Remove '\' at end of line. Lasse Reichstein 2010/05/03 10:33:44 Whoops. Whoops.
	7486 // Check if cache matches: Double value is stored in uint32_t[2] array.

	7487 Label cache_miss;

	7488 __ cmpq(rbx, Operand(rcx, 0));

	7489 __ j(not_equal, &cache_miss);

	7490 // Cache hit!

	7491 __ movq(rax, Operand(rcx, 2 * kIntSize));

	7492 __ fstp(0); // Clear FPU stack.

	7493 __ ret(kPointerSize);

	7494

	7495 __ bind(&cache_miss);

	7496 // Update cache with new value.

	7497 // We are short on registers, so use no_reg as scratch.
	Mads Ager (chromium) 2010/05/03 09:14:46 This is a left over comment from ia32, remove? This is a left over comment from ia32, remove? Lasse Reichstein 2010/05/03 10:33:44 Removed. Removed.
	7498 // This gives slightly larger code.

	7499 __ AllocateHeapNumber(rax, rdi, &runtime_call_clear_stack);

	7500 GenerateOperation(masm);

	7501 __ movq(Operand(rcx, 0), rbx);

	7502 __ movq(Operand(rcx, 2 * kIntSize), rax);

	7503 __ fstp_d(FieldOperand(rax, HeapNumber::kValueOffset));

	7504 __ ret(kPointerSize);

	7505

	7506 __ bind(&runtime_call_clear_stack);

	7507 __ fstp(0);

	7508 __ bind(&runtime_call);

	7509 __ TailCallExternalReference(ExternalReference(RuntimeFunction()), 1, 1);

	7510 }

	7511

	7512

	7513 Runtime::FunctionId TranscendentalCacheStub::RuntimeFunction() {

	7514 switch (type_) {

	7515 // Add more cases when necessary.

	7516 case TranscendentalCache::SIN: return Runtime::kMath_sin;

	7517 case TranscendentalCache::COS: return Runtime::kMath_cos;

	7518 default:

	7519 UNIMPLEMENTED();

	7520 return Runtime::kAbort;

	7521 }

	7522 }

	7523

	7524

	7525 void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {

	7526 // Only free register is edi.
	Mads Ager (chromium) 2010/05/03 09:14:46 rdi rdi Lasse Reichstein 2010/05/03 10:33:44 fixed. fixed.
	7527 Label done;

	7528 ASSERT(type_ == TranscendentalCache::SIN \|\|

	7529 type_ == TranscendentalCache::COS);

	7530 // More transcendental types can be added later.

	7531

	7532 // Both fsin and fcos require arguments in the range +/-2^63 and

	7533 // return NaN for infinities and NaN. They can share all code except

	7534 // the actual fsin/fcos operation.

	7535 Label in_range;

	7536 // If argument is outside the range -2^63..2^63, fsin/cos doesn't

	7537 // work. We must reduce it to the appropriate range.

	7538 __ movq(rdi, rbx);

	7539 __ shr(rdi, Immediate(52)); // Exponent

	7540 __ andl(rdi, Immediate(0x7ff));
	Mads Ager (chromium) 2010/05/03 09:14:46 Should we use a couple of named constants for this Should we use a couple of named constants for this? Lasse Reichstein 2010/05/03 10:33:44 Fixed. Fixed.
	7541 int supported_exponent_limit =

	7542 (63 + HeapNumber::kExponentBias);
	Mads Ager (chromium) 2010/05/03 09:14:46 This will fit on line above? This will fit on line above? Lasse Reichstein 2010/05/03 10:33:44 Done. Show quoted text On 2010/05/03 09:14:46, Mads Ager wrote: > This will fit on line above? Done.
	7543 __ cmpl(rdi, Immediate(supported_exponent_limit));

	7544 __ j(below, &in_range);

	7545 // Check for infinity and NaN. Both return NaN for sin.

	7546 __ cmpl(rdi, Immediate(0x7ff));

	7547 Label non_nan_result;

	7548 __ j(not_equal, &non_nan_result);

	7549 // Input is +/-Infinity or NaN. Result is NaN.

	7550 __ fstp(0); // Clear fpu stack.

	7551 // NaN is represented by 0x7ff8000000000000.

	7552 __ movq(rdi, static_cast<uint64_t>(0x7ff8)<<48, RelocInfo::NONE);
	Mads Ager (chromium) 2010/05/03 09:14:46 We should use a named constant for the nan represe We should use a named constant for the nan representation. Lasse Reichstein 2010/05/03 10:33:44 Done. Show quoted text On 2010/05/03 09:14:46, Mads Ager wrote: > We should use a named constant for the nan representation. Done.
	7553 __ push(rdi);

	7554 __ fld_d(Operand(rsp, 0));

	7555 __ addq(rsp, Immediate(kPointerSize));

	7556 __ jmp(&done);

	7557

	7558 __ bind(&non_nan_result);

	7559

	7560 // Use fpmod to restrict argument to the range +/-2*PI.

	7561 __ movq(rdi, rax); // Save rax before using fnstsw_ax.

	7562 __ fldpi();

	7563 __ fadd(0);

	7564 __ fld(1);

	7565 // FPU Stack: input, 2*pi, input.

	7566 {

	7567 Label no_exceptions;

	7568 __ fwait();

	7569 __ fnstsw_ax();

	7570 // Clear if Illegal Operand or Zero Division exceptions are set.

	7571 __ testl(rax, Immediate(5));

	7572 __ j(zero, &no_exceptions);

	7573 __ fnclex();

	7574 __ bind(&no_exceptions);

	7575 }

	7576

	7577 // Compute st(0) % st(1)

	7578 {

	7579 Label partial_remainder_loop;

	7580 __ bind(&partial_remainder_loop);

	7581 __ fprem1();

	7582 __ fwait();

	7583 __ fnstsw_ax();

	7584 __ testl(rax, Immediate(0x400 /* C2 */));
	Mads Ager (chromium) 2010/05/03 09:14:46 We usually do not put comment in here. This seems We usually do not put comment in here. This seems like something where we should have a named constant. Or at least move the comment above the testl line and say 'extract C2 field'. Lasse Reichstein 2010/05/03 10:33:44 C2 moved to comment. C2 moved to comment.
	7585 // If C2 is set, computation only has partial result. Loop to

	7586 // continue computation.

	7587 __ j(not_zero, &partial_remainder_loop);

	7588 }

	7589 // FPU Stack: input, 2pi, input % 2pi

	7590 __ fstp(2);

	7591 // FPU Stack: input % 2pi, 2pi,

	7592 __ fstp(0);

	7593 // FPU Stack: input % 2*pi

	7594 __ movq(rax, rdi); // Restore eax (allocated HeapNumber pointer).
	Lasse Reichstein 2010/05/03 10:33:44 eax->rax. eax->rax.
	7595

	7596 // FPU Stack: input % 2*pi

	7597 __ bind(&in_range);

	7598 switch (type_) {

	7599 case TranscendentalCache::SIN:

	7600 __ fsin();

	7601 break;

	7602 case TranscendentalCache::COS:

	7603 __ fcos();

	7604 break;

	7605 default:

	7606 UNREACHABLE();

	7607 }

	7608 __ bind(&done);

	7609 }

	7610

	7611

7400 // Get the integer part of a heap number. Surprisingly, all this bit twiddling	7612 // Get the integer part of a heap number. Surprisingly, all this bit twiddling

7401 // is faster than using the built-in instructions on floating point registers.	7613 // is faster than using the built-in instructions on floating point registers.

7402 // Trashes rdi and rbx. Dest is rcx. Source cannot be rcx or one of the	7614 // Trashes rdi and rbx. Dest is rcx. Source cannot be rcx or one of the

7403 // trashed registers.	7615 // trashed registers.

7404 void IntegerConvert(MacroAssembler* masm,	7616 void IntegerConvert(MacroAssembler* masm,

7405 Register source,	7617 Register source,

7406 bool use_sse3,	7618 bool use_sse3,

7407 Label* conversion_failure) {	7619 Label* conversion_failure) {

7408 ASSERT(!source.is(rcx) && !source.is(rdi) && !source.is(rbx));	7620 ASSERT(!source.is(rcx) && !source.is(rdi) && !source.is(rbx));

7409 Label done, right_exponent, normal_exponent;	7621 Label done, right_exponent, normal_exponent;

(...skipping 3551 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
10961 // Call the function from C++.	11173 // Call the function from C++.

10962 return FUNCTION_CAST<ModuloFunction>(buffer);	11174 return FUNCTION_CAST<ModuloFunction>(buffer);

10963 }	11175 }

10964	11176

10965 #endif	11177 #endif

10966	11178

10967	11179

10968 #undef __	11180 #undef __

10969	11181

10970 } } // namespace v8::internal	11182 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/x64/codegen-x64.h ('k') | src/x64/disasm-x64.cc » ('j') | no next file with comments »