 Chromium Code Reviews
 Chromium Code Reviews Issue 18041003:
  Implement X87 stack tracking and x87 multiplication  (Closed) 
  Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
    
  
    Issue 18041003:
  Implement X87 stack tracking and x87 multiplication  (Closed) 
  Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge| Index: src/ia32/lithium-codegen-ia32.cc | 
| diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc | 
| index 2ddeb27148000680f0b3f2e7c7c4de79f5782503..d80421a546440ee6492f65d82615cda5caa0aba7 100644 | 
| --- a/src/ia32/lithium-codegen-ia32.cc | 
| +++ b/src/ia32/lithium-codegen-ia32.cc | 
| @@ -353,7 +353,6 @@ bool LCodeGen::GenerateBody() { | 
| instr->CompileToNative(this); | 
| if (!CpuFeatures::IsSupported(SSE2)) { | 
| - ASSERT(!instr->HasDoubleRegisterResult() || x87_stack_depth_ == 1); | 
| if (FLAG_debug_code && FLAG_enable_slow_asserts) { | 
| __ VerifyX87StackDepth(x87_stack_depth_); | 
| } | 
| @@ -501,57 +500,151 @@ Register LCodeGen::ToRegister(int index) const { | 
| } | 
| +X87Register LCodeGen::ToX87Register(int index) const { | 
| + return X87Register::FromAllocationIndex(index); | 
| +} | 
| + | 
| + | 
| XMMRegister LCodeGen::ToDoubleRegister(int index) const { | 
| return XMMRegister::FromAllocationIndex(index); | 
| } | 
| -bool LCodeGen::IsX87TopOfStack(LOperand* op) const { | 
| - return op->IsDoubleRegister(); | 
| +void LCodeGen::X87LoadForUsage(X87Register reg) { | 
| + ASSERT(X87StackContains(reg)); | 
| + X87Fxch(reg); | 
| + x87_stack_depth_--; | 
| } | 
| -void LCodeGen::ReadX87Operand(Operand dst) { | 
| - ASSERT(x87_stack_depth_ == 1); | 
| - __ fst_d(dst); | 
| +void LCodeGen::X87Fxch(X87Register reg, int other_slot) { | 
| + int i = X87ArrayIndex(reg); | 
| + int st = x87_st2idx(i); | 
| + if (st != other_slot) { | 
| + int other_i = x87_st2idx(other_slot); | 
| + X87Register other = x87_stack_[other_i]; | 
| + x87_stack_[other_i] = reg; | 
| + x87_stack_[i] = other; | 
| + if (st == 0) { | 
| + __ fxch(other_slot); | 
| + } else if (other_slot == 0) { | 
| + __ fxch(st); | 
| + } else { | 
| + __ fxch(st); | 
| + __ fxch(other_slot); | 
| + __ fxch(st); | 
| + } | 
| + } | 
| } | 
| -void LCodeGen::PushX87DoubleOperand(Operand src) { | 
| - ASSERT(x87_stack_depth_ == 0); | 
| - x87_stack_depth_++; | 
| - __ fld_d(src); | 
| +int LCodeGen::x87_st2idx(int pos) { | 
| + return x87_stack_depth_ - pos - 1; | 
| } | 
| -void LCodeGen::PushX87FloatOperand(Operand src) { | 
| - ASSERT(x87_stack_depth_ == 0); | 
| - x87_stack_depth_++; | 
| - __ fld_s(src); | 
| +int LCodeGen::X87ArrayIndex(X87Register reg) { | 
| + for (int i = 0; i < x87_stack_depth_; i++) { | 
| + if (x87_stack_[i].is(reg)) return i; | 
| + } | 
| + UNREACHABLE(); | 
| + return -1; | 
| } | 
| -void LCodeGen::PopX87() { | 
| - ASSERT(x87_stack_depth_ == 1); | 
| +bool LCodeGen::X87StackContains(X87Register reg) { | 
| + for (int i = 0; i < x87_stack_depth_; i++) { | 
| + if (x87_stack_[i].is(reg)) return true; | 
| + } | 
| + return false; | 
| +} | 
| + | 
| + | 
| +void LCodeGen::X87Free(X87Register reg) { | 
| + ASSERT(x87_stack_depth_ > 0); | 
| + int i = X87ArrayIndex(reg); | 
| + int st = x87_st2idx(i); | 
| + if (st > 0) x87_stack_[i] = x87_stack_[x87_st2idx(0)]; | 
| 
mvstanton
2013/07/04 12:30:55
Could you use { } around the assignment? I'm usual
 | 
| x87_stack_depth_--; | 
| - __ fstp(0); | 
| + __ fstp(st); | 
| +} | 
| + | 
| + | 
| +void LCodeGen::X87Mov(X87Register dst, Operand src, X87OperandType opts) { | 
| + if (X87StackContains(dst)) { | 
| + X87Fxch(dst); | 
| + __ fstp(0); | 
| + } else { | 
| + x87_stack_[x87_stack_depth_] = dst; | 
| + x87_stack_depth_++; | 
| + } | 
| + X87Fld(src, opts); | 
| } | 
| -void LCodeGen::CurrentInstructionReturnsX87Result() { | 
| - ASSERT(x87_stack_depth_ <= 1); | 
| - if (x87_stack_depth_ == 0) { | 
| - x87_stack_depth_ = 1; | 
| +void LCodeGen::X87Fld(Operand src, X87OperandType opts) { | 
| + if (opts == kX87DoubleOperand) { | 
| + __ fld_d(src); | 
| + } else if (opts == kX87FloatOperand) { | 
| + __ fld_s(src); | 
| + } else if (opts == kX87IntOperand) { | 
| + __ fild_s(src); | 
| + } else { | 
| + UNREACHABLE(); | 
| } | 
| } | 
| +void LCodeGen::X87Mov(Operand dst, X87Register src) { | 
| + X87Fxch(src); | 
| + __ fst_d(dst); | 
| +} | 
| + | 
| + | 
| +void LCodeGen::X87PrepareToWrite(X87Register reg) { | 
| + if (X87StackContains(reg)) { | 
| + X87Free(reg); | 
| + } | 
| + // Mark this register as the next register to write to | 
| + x87_stack_[x87_stack_depth_] = reg; | 
| +} | 
| + | 
| + | 
| +void LCodeGen::X87CommitWrite(X87Register reg) { | 
| + // Assert the reg is prepared to write, but not on the virtual stack yet | 
| + ASSERT(!X87StackContains(reg) && x87_stack_[x87_stack_depth_].is(reg)); | 
| + x87_stack_depth_++; | 
| +} | 
| + | 
| + | 
| +void LCodeGen::X87PrepareBinaryOp( | 
| + X87Register left, X87Register right, X87Register result) { | 
| + // You need to use DefineSameAsFirst for x87 instructions | 
| + ASSERT(result.is(left)); | 
| + X87Fxch(right, 1); | 
| + X87Fxch(left); | 
| +} | 
| + | 
| + | 
| void LCodeGen::FlushX87StackIfNecessary(LInstruction* instr) { | 
| - if (x87_stack_depth_ > 0) { | 
| - if ((instr->ClobbersDoubleRegisters() || | 
| - instr->HasDoubleRegisterResult()) && | 
| - !instr->HasDoubleRegisterInput()) { | 
| - PopX87(); | 
| + if (x87_stack_depth_ > 0 && instr->ClobbersDoubleRegisters()) { | 
| + bool double_inputs = instr->HasDoubleRegisterInput(); | 
| + | 
| + // Flush stack from tos down, since PopX87() will mess with tos | 
| 
mvstanton
2013/07/04 12:30:55
Update the comment to reflect that PopX87() is gon
 | 
| + for (int i = x87_stack_depth_-1; i >= 0; i--) { | 
| + X87Register reg = x87_stack_[i]; | 
| + // Skip input registers when flushing | 
| + if (double_inputs && instr->IsDoubleInput(reg, this)) { | 
| + continue; | 
| + } | 
| + X87Free(reg); | 
| + if (i < x87_stack_depth_-1) i++; | 
| + } | 
| + } | 
| + if (instr->IsReturn()) { | 
| + while (x87_stack_depth_ > 0) { | 
| + __ fstp(0); | 
| + x87_stack_depth_--; | 
| } | 
| } | 
| } | 
| @@ -563,6 +656,12 @@ Register LCodeGen::ToRegister(LOperand* op) const { | 
| } | 
| +X87Register LCodeGen::ToX87Register(LOperand* op) const { | 
| + ASSERT(op->IsDoubleRegister()); | 
| + return ToX87Register(op->index()); | 
| +} | 
| + | 
| + | 
| XMMRegister LCodeGen::ToDoubleRegister(LOperand* op) const { | 
| ASSERT(op->IsDoubleRegister()); | 
| return ToDoubleRegister(op->index()); | 
| @@ -835,8 +934,6 @@ void LCodeGen::DeoptimizeIf(Condition cc, | 
| Deoptimizer::BailoutType bailout_type) { | 
| RegisterEnvironmentForDeoptimization(environment, Safepoint::kNoLazyDeopt); | 
| ASSERT(environment->HasBeenRegistered()); | 
| - // It's an error to deoptimize with the x87 fp stack in use. | 
| - ASSERT(x87_stack_depth_ == 0); | 
| int id = environment->deoptimization_index(); | 
| ASSERT(info()->IsOptimizing() || info()->IsStub()); | 
| Address entry = | 
| @@ -874,11 +971,16 @@ void LCodeGen::DeoptimizeIf(Condition cc, | 
| __ popfd(); | 
| } | 
| + if (x87_stack_depth_ > 0) { | 
| + Label done; | 
| 
mvstanton
2013/07/04 12:30:55
Could you explain what is happening here with a co
 | 
| + if (cc != no_condition) __ j(NegateCondition(cc), &done, Label::kNear); | 
| + for (int i = 0; i < x87_stack_depth_; i++) __ fstp(0); | 
| + __ bind(&done); | 
| + } | 
| + | 
| if (FLAG_trap_on_deopt && info()->IsOptimizing()) { | 
| Label done; | 
| - if (cc != no_condition) { | 
| - __ j(NegateCondition(cc), &done, Label::kNear); | 
| - } | 
| + if (cc != no_condition) __ j(NegateCondition(cc), &done, Label::kNear); | 
| __ int3(); | 
| __ bind(&done); | 
| } | 
| @@ -1721,11 +1823,10 @@ void LCodeGen::DoConstantD(LConstantD* instr) { | 
| int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt)); | 
| if (!CpuFeatures::IsSafeForSnapshot(SSE2)) { | 
| - __ push(Immediate(lower)); | 
| __ push(Immediate(upper)); | 
| - PushX87DoubleOperand(Operand(esp, 0)); | 
| + __ push(Immediate(lower)); | 
| + X87Mov(ToX87Register(instr->result()), Operand(esp, 0)); | 
| __ add(Operand(esp), Immediate(kDoubleSize)); | 
| - CurrentInstructionReturnsX87Result(); | 
| } else { | 
| CpuFeatureScope scope1(masm(), SSE2); | 
| ASSERT(instr->result()->IsDoubleRegister()); | 
| @@ -1990,62 +2091,67 @@ void LCodeGen::DoMathMinMax(LMathMinMax* instr) { | 
| void LCodeGen::DoArithmeticD(LArithmeticD* instr) { | 
| - CpuFeatureScope scope(masm(), SSE2); | 
| - XMMRegister left = ToDoubleRegister(instr->left()); | 
| - XMMRegister right = ToDoubleRegister(instr->right()); | 
| - XMMRegister result = ToDoubleRegister(instr->result()); | 
| - // Modulo uses a fixed result register. | 
| - ASSERT(instr->op() == Token::MOD || left.is(result)); | 
| - switch (instr->op()) { | 
| - case Token::ADD: | 
| - __ addsd(left, right); | 
| - break; | 
| - case Token::SUB: | 
| - __ subsd(left, right); | 
| - break; | 
| - case Token::MUL: | 
| - __ mulsd(left, right); | 
| - break; | 
| - case Token::DIV: | 
| - __ divsd(left, right); | 
| - // Don't delete this mov. It may improve performance on some CPUs, | 
| - // when there is a mulsd depending on the result | 
| - __ movaps(left, left); | 
| - break; | 
| - case Token::MOD: { | 
| - // Pass two doubles as arguments on the stack. | 
| - __ PrepareCallCFunction(4, eax); | 
| - __ movdbl(Operand(esp, 0 * kDoubleSize), left); | 
| - __ movdbl(Operand(esp, 1 * kDoubleSize), right); | 
| - __ CallCFunction( | 
| - ExternalReference::double_fp_operation(Token::MOD, isolate()), | 
| - 4); | 
| - | 
| - // Return value is in st(0) on ia32. | 
| - // Store it into the (fixed) result register. | 
| - __ sub(Operand(esp), Immediate(kDoubleSize)); | 
| - __ fstp_d(Operand(esp, 0)); | 
| - __ movdbl(result, Operand(esp, 0)); | 
| - __ add(Operand(esp), Immediate(kDoubleSize)); | 
| - break; | 
| + if (CpuFeatures::IsSafeForSnapshot(SSE2)) { | 
| + CpuFeatureScope scope(masm(), SSE2); | 
| + XMMRegister left = ToDoubleRegister(instr->left()); | 
| + XMMRegister right = ToDoubleRegister(instr->right()); | 
| + XMMRegister result = ToDoubleRegister(instr->result()); | 
| + // Modulo uses a fixed result register. | 
| + ASSERT(instr->op() == Token::MOD || left.is(result)); | 
| + switch (instr->op()) { | 
| + case Token::ADD: | 
| + __ addsd(left, right); | 
| + break; | 
| + case Token::SUB: | 
| + __ subsd(left, right); | 
| + break; | 
| + case Token::MUL: | 
| + __ mulsd(left, right); | 
| + break; | 
| + case Token::DIV: | 
| + __ divsd(left, right); | 
| + // Don't delete this mov. It may improve performance on some CPUs, | 
| + // when there is a mulsd depending on the result | 
| + __ movaps(left, left); | 
| + break; | 
| + case Token::MOD: { | 
| + // Pass two doubles as arguments on the stack. | 
| + __ PrepareCallCFunction(4, eax); | 
| + __ movdbl(Operand(esp, 0 * kDoubleSize), left); | 
| + __ movdbl(Operand(esp, 1 * kDoubleSize), right); | 
| + __ CallCFunction( | 
| + ExternalReference::double_fp_operation(Token::MOD, isolate()), | 
| + 4); | 
| + | 
| + // Return value is in st(0) on ia32. | 
| + // Store it into the (fixed) result register. | 
| + __ sub(Operand(esp), Immediate(kDoubleSize)); | 
| + __ fstp_d(Operand(esp, 0)); | 
| + __ movdbl(result, Operand(esp, 0)); | 
| + __ add(Operand(esp), Immediate(kDoubleSize)); | 
| + break; | 
| + } | 
| + default: | 
| + UNREACHABLE(); | 
| + break; | 
| + } | 
| + } else { | 
| + X87Register left = ToX87Register(instr->left()); | 
| + X87Register right = ToX87Register(instr->right()); | 
| + X87Register result = ToX87Register(instr->result()); | 
| + X87PrepareBinaryOp(left, right, result); | 
| + switch (instr->op()) { | 
| + case Token::MUL: | 
| + __ fmul_i(1); | 
| + break; | 
| + default: | 
| + UNREACHABLE(); | 
| + break; | 
| } | 
| - default: | 
| - UNREACHABLE(); | 
| - break; | 
| } | 
| } | 
| -void LCodeGen::DoNegateNoSSE2D(LNegateNoSSE2D* instr) { | 
| - __ push(Immediate(-1)); | 
| - __ fild_s(Operand(esp, 0)); | 
| - __ add(esp, Immediate(kPointerSize)); | 
| - __ fmulp(); | 
| - CurrentInstructionReturnsX87Result(); | 
| -} | 
| - | 
| - | 
| - | 
| void LCodeGen::DoArithmeticT(LArithmeticT* instr) { | 
| ASSERT(ToRegister(instr->context()).is(esi)); | 
| ASSERT(ToRegister(instr->left()).is(edx)); | 
| @@ -2963,8 +3069,7 @@ void LCodeGen::DoLoadNamedField(LLoadNamedField* instr) { | 
| XMMRegister result = ToDoubleRegister(instr->result()); | 
| __ movdbl(result, FieldOperand(object, offset)); | 
| } else { | 
| - PushX87DoubleOperand(FieldOperand(object, offset)); | 
| - CurrentInstructionReturnsX87Result(); | 
| + X87Mov(ToX87Register(instr->result()), FieldOperand(object, offset)); | 
| } | 
| return; | 
| } | 
| @@ -3209,16 +3314,14 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { | 
| __ movss(result, operand); | 
| __ cvtss2sd(result, result); | 
| } else { | 
| - PushX87FloatOperand(operand); | 
| - CurrentInstructionReturnsX87Result(); | 
| + X87Mov(ToX87Register(instr->result()), operand, kX87FloatOperand); | 
| } | 
| } else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) { | 
| if (CpuFeatures::IsSupported(SSE2)) { | 
| CpuFeatureScope scope(masm(), SSE2); | 
| __ movdbl(ToDoubleRegister(instr->result()), operand); | 
| } else { | 
| - PushX87DoubleOperand(operand); | 
| - CurrentInstructionReturnsX87Result(); | 
| + X87Mov(ToX87Register(instr->result()), operand); | 
| } | 
| } else { | 
| Register result(ToRegister(instr->result())); | 
| @@ -3289,8 +3392,7 @@ void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) { | 
| XMMRegister result = ToDoubleRegister(instr->result()); | 
| __ movdbl(result, double_load_operand); | 
| } else { | 
| - PushX87DoubleOperand(double_load_operand); | 
| - CurrentInstructionReturnsX87Result(); | 
| + X87Mov(ToX87Register(instr->result()), double_load_operand); | 
| } | 
| } | 
| @@ -4961,17 +5063,22 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { | 
| convert_hole = load->UsesMustHandleHole(); | 
| } | 
| + bool use_sse2 = CpuFeatures::IsSupported(SSE2); | 
| + if (!use_sse2) { | 
| + // Put the value to the top of stack | 
| + X87Register src = ToX87Register(instr->value()); | 
| + X87LoadForUsage(src); | 
| + } | 
| + | 
| Label no_special_nan_handling; | 
| Label done; | 
| if (convert_hole) { | 
| - bool use_sse2 = CpuFeatures::IsSupported(SSE2); | 
| if (use_sse2) { | 
| CpuFeatureScope scope(masm(), SSE2); | 
| XMMRegister input_reg = ToDoubleRegister(instr->value()); | 
| __ ucomisd(input_reg, input_reg); | 
| } else { | 
| __ fld(0); | 
| - __ fld(0); | 
| __ FCmp(); | 
| } | 
| @@ -5025,6 +5132,10 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { | 
| } else { | 
| __ fst_d(FieldOperand(reg, HeapNumber::kValueOffset)); | 
| } | 
| + if (!use_sse2) { | 
| + // clean up the stack | 
| + __ fstp(0); | 
| + } | 
| __ bind(&done); | 
| } | 
| @@ -5074,12 +5185,14 @@ void LCodeGen::DoSmiUntag(LSmiUntag* instr) { | 
| void LCodeGen::EmitNumberUntagDNoSSE2(Register input_reg, | 
| Register temp_reg, | 
| + X87Register res_reg, | 
| bool allow_undefined_as_nan, | 
| bool deoptimize_on_minus_zero, | 
| LEnvironment* env, | 
| NumberUntagDMode mode) { | 
| Label load_smi, done; | 
| + X87PrepareToWrite(res_reg); | 
| STATIC_ASSERT(NUMBER_CANDIDATE_IS_ANY_TAGGED_CONVERT_HOLE > | 
| NUMBER_CANDIDATE_IS_ANY_TAGGED); | 
| if (mode >= NUMBER_CANDIDATE_IS_ANY_TAGGED) { | 
| @@ -5140,6 +5253,7 @@ void LCodeGen::EmitNumberUntagDNoSSE2(Register input_reg, | 
| __ pop(input_reg); | 
| __ SmiTag(input_reg); // Retag smi. | 
| __ bind(&done); | 
| + X87CommitWrite(res_reg); | 
| } | 
| @@ -5521,11 +5635,11 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) { | 
| } else { | 
| EmitNumberUntagDNoSSE2(input_reg, | 
| temp_reg, | 
| + ToX87Register(instr->result()), | 
| instr->hydrogen()->allow_undefined_as_nan(), | 
| deoptimize_on_minus_zero, | 
| instr->environment(), | 
| mode); | 
| - CurrentInstructionReturnsX87Result(); | 
| } | 
| } |