src/ia32/lithium-codegen-ia32.cc - Issue 13426006: Improvements for x87 stack handling

Unified Diff: src/ia32/lithium-codegen-ia32.cc

Issue 13426006: Improvements for x87 stack handling (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Last comments Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/ia32/lithium-codegen-ia32.cc

diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc

index c0c1079606781b9401dc2292173d0e6cfb100141..de37ce3bbff18917c7558ed769eb6cdc8891f697 100644

--- a/src/ia32/lithium-codegen-ia32.cc

+++ b/src/ia32/lithium-codegen-ia32.cc

@@ -366,7 +366,20 @@ bool LCodeGen::GenerateBody() {

Comment(";;; @%d: %s.", current_instruction_, instr->Mnemonic());

}

+ if (!CpuFeatures::IsSupported(SSE2)) {

+ FlushX87StackIfNecessary(instr);

+ }

instr->CompileToNative(this);

+ if (!CpuFeatures::IsSupported(SSE2)) {

+ ASSERT(!instr->HasDoubleRegisterResult() || x87_stack_depth_ == 1);

+ if (FLAG_debug_code && FLAG_enable_slow_asserts) {

+ __ VerifyX87StackDepth(x87_stack_depth_);

+ }

}

EnsureSpaceForLazyDeopt();

@@ -521,6 +534,52 @@ bool LCodeGen::IsX87TopOfStack(LOperand* op) const {

}

+void LCodeGen::ReadX87Operand(Operand dst) {

+ ASSERT(x87_stack_depth_ == 1);

+ __ fst_d(dst);

+void LCodeGen::PushX87DoubleOperand(Operand src) {

+ ASSERT(x87_stack_depth_ == 0);

+ x87_stack_depth_++;

+ __ fld_d(src);

+void LCodeGen::PushX87FloatOperand(Operand src) {

+ ASSERT(x87_stack_depth_ == 0);

+ x87_stack_depth_++;

+ __ fld_s(src);

+void LCodeGen::PopX87() {

+ ASSERT(x87_stack_depth_ == 1);

+ x87_stack_depth_--;

+ __ fstp(0);

+void LCodeGen::CurrentInstructionReturnsX87Result() {

+ ASSERT(x87_stack_depth_ <= 1);

+ if (x87_stack_depth_ == 0) {

+ x87_stack_depth_ = 1;

+ }

+void LCodeGen::FlushX87StackIfNecessary(LInstruction* instr) {

+ if (x87_stack_depth_ > 0) {

+ if ((instr->ClobbersDoubleRegisters() ||

+ instr->HasDoubleRegisterResult()) &&

+ !instr->HasDoubleRegisterInput()) {

+ PopX87();

+ }

ASSERT(op->IsRegister());

return ToRegister(op->index());

@@ -846,6 +905,8 @@ void LCodeGen::RegisterEnvironmentForDeoptimization(

void LCodeGen::DeoptimizeIf(Condition cc, LEnvironment* environment) {

RegisterEnvironmentForDeoptimization(environment, Safepoint::kNoLazyDeopt);

ASSERT(environment->HasBeenRegistered());

+ // It's an error to deoptimize with the x87 fp stack in use.

+ ASSERT(x87_stack_depth_ == 0);

int id = environment->deoptimization_index();

ASSERT(info()->IsOptimizing() || info()->IsStub());

Deoptimizer::BailoutType bailout_type = info()->IsStub()

@@ -1689,40 +1750,46 @@ void LCodeGen::DoConstantI(LConstantI* instr) {

void LCodeGen::DoConstantD(LConstantD* instr) {

- ASSERT(instr->result()->IsDoubleRegister());

- XMMRegister res = ToDoubleRegister(instr->result());

double v = instr->value();

- // Use xor to produce +0.0 in a fast and compact way, but avoid to

- // do so if the constant is -0.0.

- if (BitCast<uint64_t, double>(v) == 0) {

- __ xorps(res, res);

+ uint64_t int_val = BitCast<uint64_t, double>(v);

+ int32_t lower = static_cast<int32_t>(int_val);

+ int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));

+ if (!CpuFeatures::IsSafeForSnapshot(SSE2)) {

+ __ push(Immediate(lower));

+ __ push(Immediate(upper));

+ PushX87DoubleOperand(Operand(esp, 0));

+ __ add(Operand(esp), Immediate(kDoubleSize));

+ CurrentInstructionReturnsX87Result();

} else {

- Register temp = ToRegister(instr->temp());

- uint64_t int_val = BitCast<uint64_t, double>(v);

- int32_t lower = static_cast<int32_t>(int_val);

- int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));

- if (CpuFeatures::IsSupported(SSE4_1)) {

- CpuFeatureScope scope1(masm(), SSE2);

- CpuFeatureScope scope2(masm(), SSE4_1);

- if (lower != 0) {

- __ Set(temp, Immediate(lower));

- __ movd(res, Operand(temp));

- __ Set(temp, Immediate(upper));

- __ pinsrd(res, Operand(temp), 1);

+ CpuFeatureScope scope1(masm(), SSE2);

+ ASSERT(instr->result()->IsDoubleRegister());

+ XMMRegister res = ToDoubleRegister(instr->result());

+ if (int_val == 0) {

+ __ xorps(res, res);

+ } else {

+ Register temp = ToRegister(instr->temp());

+ if (CpuFeatures::IsSupported(SSE4_1)) {

+ CpuFeatureScope scope2(masm(), SSE4_1);

+ if (lower != 0) {

+ __ Set(temp, Immediate(lower));

+ __ movd(res, Operand(temp));

+ __ Set(temp, Immediate(upper));

+ __ pinsrd(res, Operand(temp), 1);

+ } else {

+ __ xorps(res, res);

+ __ Set(temp, Immediate(upper));

+ __ pinsrd(res, Operand(temp), 1);

+ }

} else {

- __ xorps(res, res);

__ Set(temp, Immediate(upper));

- __ pinsrd(res, Operand(temp), 1);

- }

- } else {

- CpuFeatureScope scope(masm(), SSE2);

- __ Set(temp, Immediate(upper));

- __ movd(res, Operand(temp));

- __ psllq(res, 32);

- if (lower != 0) {

- __ Set(temp, Immediate(lower));

- __ movd(xmm0, Operand(temp));

- __ por(res, xmm0);

+ __ movd(res, Operand(temp));

+ __ psllq(res, 32);

+ if (lower != 0) {

+ __ Set(temp, Immediate(lower));

+ __ movd(xmm0, Operand(temp));

+ __ por(res, xmm0);

+ }

}

@@ -3158,16 +3225,16 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) {

__ movss(result, operand);

__ cvtss2sd(result, result);

} else {

- __ fld_s(operand);

- HandleX87FPReturnValue(instr);

+ PushX87FloatOperand(operand);

+ CurrentInstructionReturnsX87Result();

}

} else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) {

if (CpuFeatures::IsSupported(SSE2)) {

CpuFeatureScope scope(masm(), SSE2);

__ movdbl(ToDoubleRegister(instr->result()), operand);

} else {

- __ fld_d(operand);

- HandleX87FPReturnValue(instr);

+ PushX87DoubleOperand(operand);

+ CurrentInstructionReturnsX87Result();

}

} else {

@@ -3212,29 +3279,6 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) {

}

-void LCodeGen::HandleX87FPReturnValue(LInstruction* instr) {

- if (IsX87TopOfStack(instr->result())) {

- // Return value is already on stack. If the value has no uses, then

- // pop it off the FP stack. Otherwise, make sure that there are enough

- // copies of the value on the stack to feed all of the usages, e.g.

- // when the following instruction uses the return value in multiple

- // inputs.

- int count = instr->hydrogen_value()->UseCount();

- if (count == 0) {

- __ fstp(0);

- } else {

- count--;

- ASSERT(count <= 7);

- while (count-- > 0) {

- __ fld(0);

- }

- } else {

- __ fstp_d(ToOperand(instr->result()));

- }

void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) {

if (instr->hydrogen()->RequiresHoleCheck()) {

int offset = FixedDoubleArray::kHeaderSize - kHeapObjectTag +

@@ -3261,8 +3305,8 @@ void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) {

XMMRegister result = ToDoubleRegister(instr->result());

__ movdbl(result, double_load_operand);

} else {

- __ fld_d(double_load_operand);

- HandleX87FPReturnValue(instr);

+ PushX87DoubleOperand(double_load_operand);

+ CurrentInstructionReturnsX87Result();

}

@@ -4311,12 +4355,21 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) {

instr->additional_index()));

if (elements_kind == EXTERNAL_FLOAT_ELEMENTS) {

- CpuFeatureScope scope(masm(), SSE2);

- __ cvtsd2ss(xmm0, ToDoubleRegister(instr->value()));

- __ movss(operand, xmm0);

+ if (CpuFeatures::IsSafeForSnapshot(SSE2)) {

+ CpuFeatureScope scope(masm(), SSE2);

+ __ cvtsd2ss(xmm0, ToDoubleRegister(instr->value()));

+ __ movss(operand, xmm0);

+ } else {

+ __ fld(0);

+ __ fstp_s(operand);

+ }

} else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) {

- CpuFeatureScope scope(masm(), SSE2);

- __ movdbl(operand, ToDoubleRegister(instr->value()));

+ if (CpuFeatures::IsSafeForSnapshot(SSE2)) {

+ CpuFeatureScope scope(masm(), SSE2);

+ __ movdbl(operand, ToDoubleRegister(instr->value()));

+ } else {

+ __ fst_d(operand);

+ }

} else {

switch (elements_kind) {

@@ -4351,21 +4404,8 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) {

void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {

- CpuFeatureScope scope(masm(), SSE2);

- XMMRegister value = ToDoubleRegister(instr->value());

- if (instr->NeedsCanonicalization()) {

- Label have_value;

- __ ucomisd(value, value);

- __ j(parity_odd, &have_value); // NaN.

- ExternalReference canonical_nan_reference =

- ExternalReference::address_of_canonical_non_hole_nan();

- __ movdbl(value, Operand::StaticVariable(canonical_nan_reference));

- __ bind(&have_value);

- }

+ ExternalReference canonical_nan_reference =

+ ExternalReference::address_of_canonical_non_hole_nan();

Operand double_store_operand = BuildFastArrayOperand(

instr->elements(),

instr->key(),

@@ -4373,7 +4413,68 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) {

FAST_DOUBLE_ELEMENTS,

FixedDoubleArray::kHeaderSize - kHeapObjectTag,

instr->additional_index());

- __ movdbl(double_store_operand, value);

+ if (CpuFeatures::IsSafeForSnapshot(SSE2)) {

+ CpuFeatureScope scope(masm(), SSE2);

+ XMMRegister value = ToDoubleRegister(instr->value());

+ if (instr->NeedsCanonicalization()) {

+ Label have_value;

+ __ ucomisd(value, value);

+ __ j(parity_odd, &have_value); // NaN.

+ __ movdbl(value, Operand::StaticVariable(canonical_nan_reference));

+ __ bind(&have_value);

+ }

+ __ movdbl(double_store_operand, value);

+ } else {

+ // Can't use SSE2 in the serializer

+ if (instr->hydrogen()->IsConstantHoleStore()) {

+ // This means we should store the (double) hole. No floating point

+ // registers required.

+ double nan_double = FixedDoubleArray::hole_nan_as_double();

+ uint64_t int_val = BitCast<uint64_t, double>(nan_double);

+ int32_t lower = static_cast<int32_t>(int_val);

+ int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt));

+ __ mov(double_store_operand, Immediate(lower));

+ Operand double_store_operand2 = BuildFastArrayOperand(

+ instr->elements(),

+ instr->key(),

+ instr->hydrogen()->key()->representation(),

+ FAST_DOUBLE_ELEMENTS,

+ FixedDoubleArray::kHeaderSize - kHeapObjectTag + kPointerSize,

+ instr->additional_index());

+ __ mov(double_store_operand2, Immediate(upper));

+ } else {

+ Label no_special_nan_handling;

+ ASSERT(x87_stack_depth_ > 0);

+ if (instr->NeedsCanonicalization()) {

+ __ fld(0);

+ __ FCmp();

+ __ j(parity_odd, &no_special_nan_handling);

+ __ sub(esp, Immediate(kDoubleSize));

+ __ fst_d(MemOperand(esp, 0));

+ __ cmp(MemOperand(esp, sizeof(kHoleNanLower32)),

+ Immediate(kHoleNanUpper32));

+ __ add(esp, Immediate(kDoubleSize));

+ Label canonicalize;

+ __ j(not_equal, &canonicalize);

+ __ jmp(&no_special_nan_handling);

+ __ bind(&canonicalize);

+ __ fstp(0);

+ __ fld_d(Operand::StaticVariable(canonical_nan_reference));

+ }

+ __ bind(&no_special_nan_handling);

+ __ fst_d(double_store_operand);

+ }

}

@@ -4805,9 +4906,6 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {

XMMRegister input_reg = ToDoubleRegister(instr->value());

__ ucomisd(input_reg, input_reg);

} else {

- if (!IsX87TopOfStack(instr->value())) {

- __ fld_d(ToOperand(instr->value()));

- }

__ fld(0);

__ FCmp();

@@ -4829,6 +4927,9 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {

__ j(not_equal, &canonicalize);

__ add(esp, Immediate(kDoubleSize));

__ mov(reg, factory()->the_hole_value());

+ if (!use_sse2) {

+ __ fstp(0);

+ }

__ jmp(&done);

__ bind(&canonicalize);

__ add(esp, Immediate(kDoubleSize));

@@ -4858,10 +4959,7 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) {

XMMRegister input_reg = ToDoubleRegister(instr->value());

__ movdbl(FieldOperand(reg, HeapNumber::kValueOffset), input_reg);

} else {

- if (!IsX87TopOfStack(instr->value())) {

- __ fld_d(ToOperand(instr->value()));

- }

- __ fstp_d(FieldOperand(reg, HeapNumber::kValueOffset));

+ __ fst_d(FieldOperand(reg, HeapNumber::kValueOffset));

}

__ bind(&done);

}

@@ -4909,6 +5007,79 @@ void LCodeGen::DoSmiUntag(LSmiUntag* instr) {

}

+void LCodeGen::EmitNumberUntagDNoSSE2(Register input_reg,

+ Register temp_reg,

+ bool deoptimize_on_undefined,

+ bool deoptimize_on_minus_zero,

+ LEnvironment* env,

+ NumberUntagDMode mode) {

+ Label load_smi, done;

+ if (mode == NUMBER_CANDIDATE_IS_ANY_TAGGED) {

+ // Smi check.

+ __ JumpIfSmi(input_reg, &load_smi, Label::kNear);

+ // Heap number map check.

+ __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),

+ factory()->heap_number_map());

+ if (deoptimize_on_undefined) {

+ DeoptimizeIf(not_equal, env);

+ } else {

+ Label heap_number;

+ __ j(equal, &heap_number, Label::kNear);

+ __ cmp(input_reg, factory()->undefined_value());

+ DeoptimizeIf(not_equal, env);

+ // Convert undefined to NaN.

+ ExternalReference nan =

+ ExternalReference::address_of_canonical_non_hole_nan();

+ __ fld_d(Operand::StaticVariable(nan));

+ __ jmp(&done, Label::kNear);

+ __ bind(&heap_number);

+ }

+ // Heap number to x87 conversion.

+ __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));

+ if (deoptimize_on_minus_zero) {

+ __ fldz();

+ __ FCmp();

+ __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset));

+ __ j(not_zero, &done, Label::kNear);

+ // Use general purpose registers to check if we have -0.0

+ __ mov(temp_reg, FieldOperand(input_reg, HeapNumber::kExponentOffset));

+ __ test(temp_reg, Immediate(HeapNumber::kSignMask));

+ __ j(zero, &done, Label::kNear);

+ // Pop FPU stack before deoptimizing.

+ __ fstp(0);

+ DeoptimizeIf(not_zero, env);

+ }

+ __ jmp(&done, Label::kNear);

+ } else if (mode == NUMBER_CANDIDATE_IS_SMI_OR_HOLE) {

+ __ test(input_reg, Immediate(kSmiTagMask));

+ DeoptimizeIf(not_equal, env);

+ } else if (mode == NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE) {

+ __ test(input_reg, Immediate(kSmiTagMask));

+ __ j(zero, &load_smi);

+ ExternalReference hole_nan_reference =

+ ExternalReference::address_of_the_hole_nan();

+ __ fld_d(Operand::StaticVariable(hole_nan_reference));

+ __ jmp(&done, Label::kNear);

+ } else {

+ ASSERT(mode == NUMBER_CANDIDATE_IS_SMI);

+ }

+ __ bind(&load_smi);

+ __ SmiUntag(input_reg); // Untag smi before converting to float.

+ __ push(input_reg);

+ __ fild_s(Operand(esp, 0));

+ __ pop(input_reg);

+ __ SmiTag(input_reg); // Retag smi.

+ __ bind(&done);

void LCodeGen::EmitNumberUntagD(Register input_reg,

XMMRegister result_reg,

@@ -5021,7 +5192,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {

__ fisttp_d(Operand(esp, 0));

__ mov(input_reg, Operand(esp, 0)); // Low word of answer is the result.

__ add(Operand(esp), Immediate(kDoubleSize));

- } else {

+ } else if (CpuFeatures::IsSupported(SSE2)) {

CpuFeatureScope scope(masm(), SSE2);

XMMRegister xmm_temp = ToDoubleRegister(instr->temp());

__ movdbl(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset));

@@ -5035,6 +5206,8 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {

__ ucomisd(xmm_temp, xmm0);

DeoptimizeIf(not_equal, instr->environment());

DeoptimizeIf(parity_even, instr->environment()); // NaN.

+ } else {

+ UNREACHABLE();

}

} else if (CpuFeatures::IsSupported(SSE2)) {

CpuFeatureScope scope(masm(), SSE2);

@@ -5079,18 +5252,169 @@ void LCodeGen::DoTaggedToI(LTaggedToI* instr) {

LOperand* input = instr->value();

ASSERT(input->IsRegister());

- ASSERT(input->Equals(instr->result()));

+ ASSERT(input_reg.is(ToRegister(instr->result())));

DeferredTaggedToI* deferred = new(zone()) DeferredTaggedToI(this, instr);

- // Smi check.

__ JumpIfNotSmi(input_reg, deferred->entry());

+ __ SmiUntag(input_reg);

+ __ bind(deferred->exit());

- // Smi to int32 conversion

- __ SmiUntag(input_reg); // Untag smi.

+void LCodeGen::DoDeferredTaggedToINoSSE2(LTaggedToINoSSE2* instr) {

+ Label done, heap_number;

+ Register result_reg = ToRegister(instr->result());

+ Register input_reg = ToRegister(instr->value());

+ // Heap number map check.

+ __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),

+ factory()->heap_number_map());

+ __ j(equal, &heap_number, Label::kNear);

+ // Check for undefined. Undefined is converted to zero for truncating

+ // conversions.

+ __ cmp(input_reg, factory()->undefined_value());

+ __ RecordComment("Deferred TaggedToI: cannot truncate");

+ DeoptimizeIf(not_equal, instr->environment());

+ __ xor_(result_reg, result_reg);

+ __ jmp(&done, Label::kFar);

+ __ bind(&heap_number);

+ // Surprisingly, all of this crazy bit manipulation is considerably

+ // faster than using the built-in x86 CPU conversion functions (about 6x).

+ Label right_exponent, adjust_bias, zero_result;

+ Register scratch = ToRegister(instr->scratch());

+ Register scratch2 = ToRegister(instr->scratch2());

+ // Get exponent word.

+ __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset));

+ // Get exponent alone in scratch2.

+ __ mov(scratch2, scratch);

+ __ and_(scratch2, HeapNumber::kExponentMask);

+ __ shr(scratch2, HeapNumber::kExponentShift);

+ if (instr->truncating()) {

+ __ j(zero, &zero_result);

+ } else {

+ __ j(not_zero, &adjust_bias);

+ __ test(scratch, Immediate(HeapNumber::kMantissaMask));

+ DeoptimizeIf(not_zero, instr->environment());

+ __ cmp(FieldOperand(input_reg, HeapNumber::kMantissaOffset), Immediate(0));

+ DeoptimizeIf(not_equal, instr->environment());

+ __ bind(&adjust_bias);

+ }

+ __ sub(scratch2, Immediate(HeapNumber::kExponentBias));

+ if (!instr->truncating()) {

+ DeoptimizeIf(negative, instr->environment());

+ } else {

+ __ j(negative, &zero_result);

+ }

+ // Get the second half of the double. For some exponents we don't

+ // actually need this because the bits get shifted out again, but

+ // it's probably slower to test than just to do it.

+ Register scratch3 = ToRegister(instr->scratch3());

+ __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset));

+ __ xor_(result_reg, result_reg);

+ const uint32_t non_int32_exponent = 31;

+ __ cmp(scratch2, Immediate(non_int32_exponent));

+ // If we have a match of the int32 exponent then skip some logic.

+ __ j(equal, &right_exponent, Label::kNear);

+ // If the number doesn't find in an int32, deopt.

+ DeoptimizeIf(greater, instr->environment());

+ // Exponent word in scratch, exponent in scratch2. We know that 0 <= exponent

+ // < 31.

+ __ mov(result_reg, Immediate(31));

+ __ sub(result_reg, scratch2);

+ __ bind(&right_exponent);

+ // Save off exponent for negative check later.

+ __ mov(scratch2, scratch);

+ // Here result_reg is the shift, scratch is the exponent word.

+ // Get the top bits of the mantissa.

+ __ and_(scratch, HeapNumber::kMantissaMask);

+ // Put back the implicit 1.

+ __ or_(scratch, 1 << HeapNumber::kExponentShift);

+ // Shift up the mantissa bits to take up the space the exponent used to

+ // take. We have kExponentShift + 1 significant bits int he low end of the

+ // word. Shift them to the top bits.

+ const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1;

+ __ shl(scratch, shift_distance);

+ if (!instr->truncating()) {

+ // If not truncating, a non-zero value in the bottom 22 bits means a

+ // non-integral value --> trigger a deopt.

+ __ test(scratch3, Immediate((1 << (32 - shift_distance)) - 1));

+ DeoptimizeIf(not_equal, instr->environment());

+ }

+ // Shift down 22 bits to get the most significant 10 bits or the low

+ // mantissa word.

+ __ shr(scratch3, 32 - shift_distance);

+ __ or_(scratch3, scratch);

+ if (!instr->truncating()) {

+ // If truncating, a non-zero value in the bits that will be shifted away

+ // when adjusting the exponent means rounding --> deopt.

+ __ mov(scratch, 0x1);

+ ASSERT(result_reg.is(ecx));

+ __ shl_cl(scratch);

+ __ dec(scratch);

+ __ test(scratch3, scratch);

+ DeoptimizeIf(not_equal, instr->environment());

+ }

+ // Move down according to the exponent.

+ ASSERT(result_reg.is(ecx));

+ __ shr_cl(scratch3);

+ // Now the unsigned 32-bit answer is in scratch3. We need to move it to

+ // result_reg and we may need to fix the sign.

+ Label negative_result;

+ __ xor_(result_reg, result_reg);

+ __ cmp(scratch2, result_reg);

+ __ j(less, &negative_result, Label::kNear);

+ __ cmp(scratch3, result_reg);

+ __ mov(result_reg, scratch3);

+ // If the result is > MAX_INT, result doesn't fit in signed 32-bit --> deopt.

+ DeoptimizeIf(less, instr->environment());

+ __ jmp(&done, Label::kNear);

+ __ bind(&zero_result);

+ __ xor_(result_reg, result_reg);

+ __ jmp(&done, Label::kNear);

+ __ bind(&negative_result);

+ __ sub(result_reg, scratch3);

+ if (!instr->truncating()) {

+ // -0.0 triggers a deopt.

+ DeoptimizeIf(zero, instr->environment());

+ }

+ // If the negative subtraction overflows into a positive number, there was an

+ // overflow --> deopt.

+ DeoptimizeIf(positive, instr->environment());

+ __ bind(&done);

+void LCodeGen::DoTaggedToINoSSE2(LTaggedToINoSSE2* instr) {

+ class DeferredTaggedToINoSSE2: public LDeferredCode {

+ public:

+ DeferredTaggedToINoSSE2(LCodeGen* codegen, LTaggedToINoSSE2* instr)

+ : LDeferredCode(codegen), instr_(instr) { }

+ virtual void Generate() { codegen()->DoDeferredTaggedToINoSSE2(instr_); }

+ virtual LInstruction* instr() { return instr_; }

+ private:

+ LTaggedToINoSSE2* instr_;

+ };

+ LOperand* input = instr->value();

+ ASSERT(input->IsRegister());

+ Register input_reg = ToRegister(input);

+ ASSERT(input_reg.is(ToRegister(instr->result())));

+ DeferredTaggedToINoSSE2* deferred =

+ new(zone()) DeferredTaggedToINoSSE2(this, instr);

+ // Smi check.

+ __ JumpIfNotSmi(input_reg, deferred->entry());

+ __ SmiUntag(input_reg); // Untag smi.

__ bind(deferred->exit());

}

@@ -5103,32 +5427,31 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {

LOperand* result = instr->result();

ASSERT(result->IsDoubleRegister());

- if (CpuFeatures::IsSupported(SSE2)) {

- CpuFeatureScope scope(masm(), SSE2);

- Register input_reg = ToRegister(input);

- XMMRegister result_reg = ToDoubleRegister(result);

- bool deoptimize_on_minus_zero =

- instr->hydrogen()->deoptimize_on_minus_zero();

- Register temp_reg = deoptimize_on_minus_zero ? ToRegister(temp) : no_reg;

- NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED;

- HValue* value = instr->hydrogen()->value();

- if (value->type().IsSmi()) {

- if (value->IsLoadKeyed()) {

- HLoadKeyed* load = HLoadKeyed::cast(value);

- if (load->UsesMustHandleHole()) {

- if (load->hole_mode() == ALLOW_RETURN_HOLE) {

- mode = NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE;

- } else {

- mode = NUMBER_CANDIDATE_IS_SMI_OR_HOLE;

- }

+ Register input_reg = ToRegister(input);

+ bool deoptimize_on_minus_zero =

+ instr->hydrogen()->deoptimize_on_minus_zero();

+ Register temp_reg = deoptimize_on_minus_zero ? ToRegister(temp) : no_reg;

+ NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED;

+ HValue* value = instr->hydrogen()->value();

+ if (value->type().IsSmi()) {

+ if (value->IsLoadKeyed()) {

+ HLoadKeyed* load = HLoadKeyed::cast(value);

+ if (load->UsesMustHandleHole()) {

+ if (load->hole_mode() == ALLOW_RETURN_HOLE) {

+ mode = NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE;

} else {

- mode = NUMBER_CANDIDATE_IS_SMI;

+ mode = NUMBER_CANDIDATE_IS_SMI_OR_HOLE;

}

+ } else {

+ mode = NUMBER_CANDIDATE_IS_SMI;

}

+ }

+ if (CpuFeatures::IsSupported(SSE2)) {

+ CpuFeatureScope scope(masm(), SSE2);

+ XMMRegister result_reg = ToDoubleRegister(result);

EmitNumberUntagD(input_reg,

temp_reg,

result_reg,

@@ -5137,7 +5460,13 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) {

instr->environment(),

mode);

} else {

- UNIMPLEMENTED();

+ EmitNumberUntagDNoSSE2(input_reg,

+ temp_reg,

+ instr->hydrogen()->deoptimize_on_undefined(),

+ deoptimize_on_minus_zero,

+ instr->environment(),

+ mode);

+ CurrentInstructionReturnsX87Result();

}

@@ -5409,7 +5738,128 @@ void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) {

__ bind(&is_smi);

__ SmiUntag(input_reg);

__ ClampUint8(input_reg);

+ __ bind(&done);

+void LCodeGen::DoClampTToUint8NoSSE2(LClampTToUint8NoSSE2* instr) {

+ Register input_reg = ToRegister(instr->unclamped());

+ Register result_reg = ToRegister(instr->result());

+ Register scratch = ToRegister(instr->scratch());

+ Register scratch2 = ToRegister(instr->scratch2());

+ Register scratch3 = ToRegister(instr->scratch3());

+ Label is_smi, done, heap_number, valid_exponent,

+ largest_value, zero_result, maybe_nan_or_infinity;

+ __ JumpIfSmi(input_reg, &is_smi);

+ // Check for heap number

+ __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset),

+ factory()->heap_number_map());

+ __ j(equal, &heap_number, Label::kFar);

+ // Check for undefined. Undefined is converted to zero for clamping

+ // conversions.

+ __ cmp(input_reg, factory()->undefined_value());

+ DeoptimizeIf(not_equal, instr->environment());

+ __ jmp(&zero_result);

+ // Heap number

+ __ bind(&heap_number);

+ // Surprisingly, all of the hand-crafted bit-manipulations below are much

+ // faster than the x86 FPU built-in instruction, especially since "banker's

+ // rounding" would be additionally very expensive

+ // Get exponent word.

+ __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset));

+ __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset));

+ // Test for negative values --> clamp to zero

+ __ test(scratch, scratch);

+ __ j(negative, &zero_result);

+ // Get exponent alone in scratch2.

+ __ mov(scratch2, scratch);

+ __ and_(scratch2, HeapNumber::kExponentMask);

+ __ shr(scratch2, HeapNumber::kExponentShift);

+ __ j(zero, &zero_result);

+ __ sub(scratch2, Immediate(HeapNumber::kExponentBias - 1));

+ __ j(negative, &zero_result);

+ const uint32_t non_int8_exponent = 7;

+ __ cmp(scratch2, Immediate(non_int8_exponent + 1));

+ // If the exponent is too big, check for special values.

+ __ j(greater, &maybe_nan_or_infinity, Label::kNear);

+ __ bind(&valid_exponent);

+ // Exponent word in scratch, exponent in scratch2. We know that 0 <= exponent

+ // < 7. The shift bias is the number of bits to shift the mantissa such that

+ // with an exponent of 7 such the that top-most one is in bit 30, allowing

+ // detection the rounding overflow of a 255.5 to 256 (bit 31 goes from 0 to

+ // 1).

+ int shift_bias = (30 - HeapNumber::kExponentShift) - 7 - 1;

+ __ lea(result_reg, MemOperand(scratch2, shift_bias));

+ // Here result_reg (ecx) is the shift, scratch is the exponent word. Get the

+ // top bits of the mantissa.

+ __ and_(scratch, HeapNumber::kMantissaMask);

+ // Put back the implicit 1 of the mantissa

+ __ or_(scratch, 1 << HeapNumber::kExponentShift);

+ // Shift up to round

+ __ shl_cl(scratch);

+ // Use "banker's rounding" to spec: If fractional part of number is 0.5, then

+ // use the bit in the "ones" place and add it to the "halves" place, which has

+ // the effect of rounding to even.

+ __ mov(scratch2, scratch);

+ const uint32_t one_half_bit_shift = 30 - sizeof(uint8_t) * 8;

+ const uint32_t one_bit_shift = one_half_bit_shift + 1;

+ __ and_(scratch2, Immediate((1 << one_bit_shift) - 1));

+ __ cmp(scratch2, Immediate(1 << one_half_bit_shift));

+ Label no_round;

+ __ j(less, &no_round);

+ Label round_up;

+ __ mov(scratch2, Immediate(1 << one_half_bit_shift));

+ __ j(greater, &round_up);

+ __ test(scratch3, scratch3);

+ __ j(not_zero, &round_up);

+ __ mov(scratch2, scratch);

+ __ and_(scratch2, Immediate(1 << one_bit_shift));

+ __ shr(scratch2, 1);

+ __ bind(&round_up);

+ __ add(scratch, scratch2);

+ __ j(overflow, &largest_value);

+ __ bind(&no_round);

+ __ shr(scratch, 23);

+ __ mov(result_reg, scratch);

+ __ jmp(&done, Label::kNear);

+ __ bind(&maybe_nan_or_infinity);

+ // Check for NaN/Infinity, all other values map to 255

+ __ cmp(scratch2, Immediate(HeapNumber::kInfinityOrNanExponent + 1));

+ __ j(not_equal, &largest_value, Label::kNear);

+ // Check for NaN, which differs from Infinity in that at least one mantissa

+ // bit is set.

+ __ and_(scratch, HeapNumber::kMantissaMask);

+ __ or_(scratch, FieldOperand(input_reg, HeapNumber::kMantissaOffset));

+ __ j(not_zero, &zero_result); // M!=0 --> NaN

+ // Infinity -> Fall through to map to 255.

+ __ bind(&largest_value);

+ __ mov(result_reg, Immediate(255));

+ __ jmp(&done, Label::kNear);

+ __ bind(&zero_result);

+ __ xor_(result_reg, result_reg);

+ __ jmp(&done);

+ // smi

+ __ bind(&is_smi);

+ if (!input_reg.is(result_reg)) {

+ __ mov(result_reg, input_reg);

+ }

+ __ SmiUntag(result_reg);

+ __ ClampUint8(result_reg);

__ bind(&done);

}

« no previous file with comments | « src/ia32/lithium-codegen-ia32.h ('k') | src/ia32/lithium-gap-resolver-ia32.cc » ('j') | no next file with comments »