Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(281)

Side by Side Diff: src/x64/codegen-x64.cc

Issue 1860001: X64: Port inline transcendental cache to X64. (Closed)
Patch Set: Created 10 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/x64/codegen-x64.h ('k') | src/x64/disasm-x64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 4499 matching lines...) Expand 10 before | Expand all | Expand 10 after
4510 Load(args->at(i + 1)); 4510 Load(args->at(i + 1));
4511 } 4511 }
4512 Load(args->at(n_args + 1)); // function 4512 Load(args->at(n_args + 1)); // function
4513 Result result = frame_->CallJSFunction(n_args); 4513 Result result = frame_->CallJSFunction(n_args);
4514 frame_->Push(&result); 4514 frame_->Push(&result);
4515 } 4515 }
4516 4516
4517 4517
4518 void CodeGenerator::GenerateMathSin(ZoneList<Expression*>* args) { 4518 void CodeGenerator::GenerateMathSin(ZoneList<Expression*>* args) {
4519 ASSERT_EQ(args->length(), 1); 4519 ASSERT_EQ(args->length(), 1);
4520 // Load the argument on the stack and jump to the runtime.
4521 Load(args->at(0)); 4520 Load(args->at(0));
4522 Result answer = frame_->CallRuntime(Runtime::kMath_sin, 1); 4521 TranscendentalCacheStub stub(TranscendentalCache::SIN);
4523 frame_->Push(&answer); 4522 Result result = frame_->CallStub(&stub, 1);
4523 frame_->Push(&result);
4524 } 4524 }
4525 4525
4526 4526
4527 void CodeGenerator::GenerateMathCos(ZoneList<Expression*>* args) { 4527 void CodeGenerator::GenerateMathCos(ZoneList<Expression*>* args) {
4528 ASSERT_EQ(args->length(), 1); 4528 ASSERT_EQ(args->length(), 1);
4529 // Load the argument on the stack and jump to the runtime.
4530 Load(args->at(0)); 4529 Load(args->at(0));
4531 Result answer = frame_->CallRuntime(Runtime::kMath_cos, 1); 4530 TranscendentalCacheStub stub(TranscendentalCache::COS);
4532 frame_->Push(&answer); 4531 Result result = frame_->CallStub(&stub, 1);
4532 frame_->Push(&result);
4533 } 4533 }
4534 4534
4535 4535
4536 void CodeGenerator::GenerateStringAdd(ZoneList<Expression*>* args) { 4536 void CodeGenerator::GenerateStringAdd(ZoneList<Expression*>* args) {
4537 ASSERT_EQ(2, args->length()); 4537 ASSERT_EQ(2, args->length());
4538 4538
4539 Load(args->at(0)); 4539 Load(args->at(0));
4540 Load(args->at(1)); 4540 Load(args->at(1));
4541 4541
4542 StringAddStub stub(NO_STRING_ADD_FLAGS); 4542 StringAddStub stub(NO_STRING_ADD_FLAGS);
(...skipping 2847 matching lines...) Expand 10 before | Expand all | Expand 10 after
7390 if (answer_object == Heap::undefined_value()) { 7390 if (answer_object == Heap::undefined_value()) {
7391 return false; 7391 return false;
7392 } 7392 }
7393 frame_->Push(Handle<Object>(answer_object)); 7393 frame_->Push(Handle<Object>(answer_object));
7394 return true; 7394 return true;
7395 } 7395 }
7396 7396
7397 7397
7398 // End of CodeGenerator implementation. 7398 // End of CodeGenerator implementation.
7399 7399
7400 void TranscendentalCacheStub::Generate(MacroAssembler* masm) {
7401 // Input on stack:
7402 // rsp[8]: argument (should be number).
7403 // rsp[0]: return address.
7404 // Test that eax is a number.
Mads Ager (chromium) 2010/05/03 09:14:46 eax -> rax Move to after the label declarations?
Lasse Reichstein 2010/05/03 10:33:44 Done.
7405 Label runtime_call;
7406 Label runtime_call_clear_stack;
7407 Label input_not_smi;
7408 Label loaded;
7409 __ movq(rax, Operand(rsp, kPointerSize));
7410 __ JumpIfNotSmi(rax, &input_not_smi);
7411 // Input is a smi. Untag and load it onto the FPU stack.
7412 // Then load the bits of the double into rbx.
7413 ASSERT_EQ(1, kSmiTagSize);
Mads Ager (chromium) 2010/05/03 09:14:46 Does the assert add anything when you are using th
Lasse Reichstein 2010/05/03 10:33:44 Removed.
7414 __ SmiToInteger32(rax, rax);
7415 __ subq(rsp, Immediate(kPointerSize));
7416 __ cvtlsi2sd(xmm1, rax);
7417 __ movsd(Operand(rsp, 0), xmm1);
7418 __ movq(rbx, xmm1);
7419 __ movq(rdx, xmm1);
7420 __ fld_d(Operand(rsp, 0));
7421 __ addq(rsp, Immediate(kPointerSize));
7422 __ jmp(&loaded);
7423
7424 __ bind(&input_not_smi);
7425 // Check if input is a HeapNumber.
7426 __ Move(rbx, Factory::heap_number_map());
7427 __ cmpq(rbx, FieldOperand(rax, HeapObject::kMapOffset));
Mads Ager (chromium) 2010/05/03 09:14:46 Can we use CompareRoot here?
Lasse Reichstein 2010/05/03 10:33:44 We can, but in this case I don't want to. Using th
7428 __ j(not_equal, &runtime_call);
7429 // Input is a HeapNumber. Push it on the FPU stack and load its
7430 // bits into rbx.
7431 __ fld_d(FieldOperand(rax, HeapNumber::kValueOffset));
7432 __ movq(rbx, FieldOperand(rax, HeapNumber::kValueOffset));
7433 __ movq(rdx, rbx);
7434 __ bind(&loaded);
7435 // ST[0] == double value
7436 // rbx = bits of double value.
7437 // rdx = also bits of double value.
7438 // Compute hash (h is 32 bits, bits are 64):
7439 // h = h0 = bits ^ (bits >> 32);
7440 // h ^= h >> 16;
7441 // h ^= h >> 8;
7442 // h = h & (cacheSize - 1);
7443 // or h = (h0 ^ (h0 >> 8) ^ (h0 >> 16) ^ (h0 >> 24)) & cacheSize - 1
Mads Ager (chromium) 2010/05/03 09:14:46 add parenthesis (cacheSize - 1)?
Lasse Reichstein 2010/05/03 10:33:44 Done.
7444 __ sar(rdx, Immediate(32));
7445 __ xorl(rdx, rbx);
7446 __ movl(rcx, rdx);
7447 __ movl(rax, rdx);
7448 __ movl(rdi, rdx);
7449 __ sarl(rdx, Immediate(8));
7450 __ sarl(rcx, Immediate(16));
7451 __ sarl(rax, Immediate(24));
7452 __ xorl(rcx, rdx);
7453 __ xorl(rax, rdi);
7454 __ xorl(rcx, rax);
7455 ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));
7456 __ andl(rcx, Immediate(TranscendentalCache::kCacheSize - 1));
7457 // ST[0] == double value.
7458 // rbx = bits of double value.
7459 // rcx = TranscendentalCache::hash(double value).
7460 __ movq(rax, ExternalReference::transcendental_cache_array_address());
7461 // rax points to cache array.
7462 __ movq(rax, Operand(rax, type_ * sizeof(TranscendentalCache::caches_[0])));
7463 // rax points to the cache for the type type_.
7464 // If NULL, the cache hasn't been initialized yet, so go through runtime.
7465 __ testq(rax, rax);
7466 __ j(zero, &runtime_call_clear_stack);
7467 #ifdef DEBUG
7468 // Check that the layout of cache elements match expectations.
7469 { // NOLINT - doesn't like a single brace on a line.
Mads Ager (chromium) 2010/05/03 09:14:46 Will the linter be happy with: { Transcendenta
Lasse Reichstein 2010/05/03 10:33:44 It probably will, but I'd hate it instead. I'd rat
7470 TranscendentalCache::Element test_elem[2];
7471 char* elem_start = reinterpret_cast<char*>(&test_elem[0]);
7472 char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);
7473 char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));
7474 char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));
7475 char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));
7476 // Two uint_32's and a pointer per element.
7477 CHECK_EQ(16, static_cast<int>(elem2_start - elem_start));
7478 CHECK_EQ(0, static_cast<int>(elem_in0 - elem_start));
7479 CHECK_EQ(kIntSize, static_cast<int>(elem_in1 - elem_start));
7480 CHECK_EQ(2 * kIntSize, static_cast<int>(elem_out - elem_start));
7481 }
7482 #endif
7483 // Find the address of the rcx'th entry in the cache, i.e., &rax[rcx*16].
7484 __ addl(rcx, rcx);
7485 __ lea(rcx, Operand(rax, rcx, times_8, 0));\
Mads Ager (chromium) 2010/05/03 09:14:46 Remove '\' at end of line.
Lasse Reichstein 2010/05/03 10:33:44 Whoops.
7486 // Check if cache matches: Double value is stored in uint32_t[2] array.
7487 Label cache_miss;
7488 __ cmpq(rbx, Operand(rcx, 0));
7489 __ j(not_equal, &cache_miss);
7490 // Cache hit!
7491 __ movq(rax, Operand(rcx, 2 * kIntSize));
7492 __ fstp(0); // Clear FPU stack.
7493 __ ret(kPointerSize);
7494
7495 __ bind(&cache_miss);
7496 // Update cache with new value.
7497 // We are short on registers, so use no_reg as scratch.
Mads Ager (chromium) 2010/05/03 09:14:46 This is a left over comment from ia32, remove?
Lasse Reichstein 2010/05/03 10:33:44 Removed.
7498 // This gives slightly larger code.
7499 __ AllocateHeapNumber(rax, rdi, &runtime_call_clear_stack);
7500 GenerateOperation(masm);
7501 __ movq(Operand(rcx, 0), rbx);
7502 __ movq(Operand(rcx, 2 * kIntSize), rax);
7503 __ fstp_d(FieldOperand(rax, HeapNumber::kValueOffset));
7504 __ ret(kPointerSize);
7505
7506 __ bind(&runtime_call_clear_stack);
7507 __ fstp(0);
7508 __ bind(&runtime_call);
7509 __ TailCallExternalReference(ExternalReference(RuntimeFunction()), 1, 1);
7510 }
7511
7512
7513 Runtime::FunctionId TranscendentalCacheStub::RuntimeFunction() {
7514 switch (type_) {
7515 // Add more cases when necessary.
7516 case TranscendentalCache::SIN: return Runtime::kMath_sin;
7517 case TranscendentalCache::COS: return Runtime::kMath_cos;
7518 default:
7519 UNIMPLEMENTED();
7520 return Runtime::kAbort;
7521 }
7522 }
7523
7524
7525 void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {
7526 // Only free register is edi.
Mads Ager (chromium) 2010/05/03 09:14:46 rdi
Lasse Reichstein 2010/05/03 10:33:44 fixed.
7527 Label done;
7528 ASSERT(type_ == TranscendentalCache::SIN ||
7529 type_ == TranscendentalCache::COS);
7530 // More transcendental types can be added later.
7531
7532 // Both fsin and fcos require arguments in the range +/-2^63 and
7533 // return NaN for infinities and NaN. They can share all code except
7534 // the actual fsin/fcos operation.
7535 Label in_range;
7536 // If argument is outside the range -2^63..2^63, fsin/cos doesn't
7537 // work. We must reduce it to the appropriate range.
7538 __ movq(rdi, rbx);
7539 __ shr(rdi, Immediate(52)); // Exponent
7540 __ andl(rdi, Immediate(0x7ff));
Mads Ager (chromium) 2010/05/03 09:14:46 Should we use a couple of named constants for this
Lasse Reichstein 2010/05/03 10:33:44 Fixed.
7541 int supported_exponent_limit =
7542 (63 + HeapNumber::kExponentBias);
Mads Ager (chromium) 2010/05/03 09:14:46 This will fit on line above?
Lasse Reichstein 2010/05/03 10:33:44 Done.
7543 __ cmpl(rdi, Immediate(supported_exponent_limit));
7544 __ j(below, &in_range);
7545 // Check for infinity and NaN. Both return NaN for sin.
7546 __ cmpl(rdi, Immediate(0x7ff));
7547 Label non_nan_result;
7548 __ j(not_equal, &non_nan_result);
7549 // Input is +/-Infinity or NaN. Result is NaN.
7550 __ fstp(0); // Clear fpu stack.
7551 // NaN is represented by 0x7ff8000000000000.
7552 __ movq(rdi, static_cast<uint64_t>(0x7ff8)<<48, RelocInfo::NONE);
Mads Ager (chromium) 2010/05/03 09:14:46 We should use a named constant for the nan represe
Lasse Reichstein 2010/05/03 10:33:44 Done.
7553 __ push(rdi);
7554 __ fld_d(Operand(rsp, 0));
7555 __ addq(rsp, Immediate(kPointerSize));
7556 __ jmp(&done);
7557
7558 __ bind(&non_nan_result);
7559
7560 // Use fpmod to restrict argument to the range +/-2*PI.
7561 __ movq(rdi, rax); // Save rax before using fnstsw_ax.
7562 __ fldpi();
7563 __ fadd(0);
7564 __ fld(1);
7565 // FPU Stack: input, 2*pi, input.
7566 {
7567 Label no_exceptions;
7568 __ fwait();
7569 __ fnstsw_ax();
7570 // Clear if Illegal Operand or Zero Division exceptions are set.
7571 __ testl(rax, Immediate(5));
7572 __ j(zero, &no_exceptions);
7573 __ fnclex();
7574 __ bind(&no_exceptions);
7575 }
7576
7577 // Compute st(0) % st(1)
7578 {
7579 Label partial_remainder_loop;
7580 __ bind(&partial_remainder_loop);
7581 __ fprem1();
7582 __ fwait();
7583 __ fnstsw_ax();
7584 __ testl(rax, Immediate(0x400 /* C2 */));
Mads Ager (chromium) 2010/05/03 09:14:46 We usually do not put comment in here. This seems
Lasse Reichstein 2010/05/03 10:33:44 C2 moved to comment.
7585 // If C2 is set, computation only has partial result. Loop to
7586 // continue computation.
7587 __ j(not_zero, &partial_remainder_loop);
7588 }
7589 // FPU Stack: input, 2*pi, input % 2*pi
7590 __ fstp(2);
7591 // FPU Stack: input % 2*pi, 2*pi,
7592 __ fstp(0);
7593 // FPU Stack: input % 2*pi
7594 __ movq(rax, rdi); // Restore eax (allocated HeapNumber pointer).
Lasse Reichstein 2010/05/03 10:33:44 eax->rax.
7595
7596 // FPU Stack: input % 2*pi
7597 __ bind(&in_range);
7598 switch (type_) {
7599 case TranscendentalCache::SIN:
7600 __ fsin();
7601 break;
7602 case TranscendentalCache::COS:
7603 __ fcos();
7604 break;
7605 default:
7606 UNREACHABLE();
7607 }
7608 __ bind(&done);
7609 }
7610
7611
7400 // Get the integer part of a heap number. Surprisingly, all this bit twiddling 7612 // Get the integer part of a heap number. Surprisingly, all this bit twiddling
7401 // is faster than using the built-in instructions on floating point registers. 7613 // is faster than using the built-in instructions on floating point registers.
7402 // Trashes rdi and rbx. Dest is rcx. Source cannot be rcx or one of the 7614 // Trashes rdi and rbx. Dest is rcx. Source cannot be rcx or one of the
7403 // trashed registers. 7615 // trashed registers.
7404 void IntegerConvert(MacroAssembler* masm, 7616 void IntegerConvert(MacroAssembler* masm,
7405 Register source, 7617 Register source,
7406 bool use_sse3, 7618 bool use_sse3,
7407 Label* conversion_failure) { 7619 Label* conversion_failure) {
7408 ASSERT(!source.is(rcx) && !source.is(rdi) && !source.is(rbx)); 7620 ASSERT(!source.is(rcx) && !source.is(rdi) && !source.is(rbx));
7409 Label done, right_exponent, normal_exponent; 7621 Label done, right_exponent, normal_exponent;
(...skipping 3551 matching lines...) Expand 10 before | Expand all | Expand 10 after
10961 // Call the function from C++. 11173 // Call the function from C++.
10962 return FUNCTION_CAST<ModuloFunction>(buffer); 11174 return FUNCTION_CAST<ModuloFunction>(buffer);
10963 } 11175 }
10964 11176
10965 #endif 11177 #endif
10966 11178
10967 11179
10968 #undef __ 11180 #undef __
10969 11181
10970 } } // namespace v8::internal 11182 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/x64/codegen-x64.h ('k') | src/x64/disasm-x64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698