Chromium Code Reviews| Index: src/ia32/lithium-codegen-ia32.cc |
| diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc |
| index c0c1079606781b9401dc2292173d0e6cfb100141..57d402f10761a22a59afebc3639c9d626598beec 100644 |
| --- a/src/ia32/lithium-codegen-ia32.cc |
| +++ b/src/ia32/lithium-codegen-ia32.cc |
| @@ -102,7 +102,8 @@ void LCodeGen::FinishCode(Handle<Code> code) { |
| ASSERT(is_done()); |
| code->set_stack_slots(GetStackSlotCount()); |
| code->set_safepoint_table_offset(safepoints_.GetCodeOffset()); |
| - if (FLAG_weak_embedded_maps_in_optimized_code) { |
| + if (FLAG_weak_embedded_maps_in_optimized_code && |
| + code->kind() == Code::OPTIMIZED_FUNCTION) { |
|
mvstanton
2013/04/08 16:10:14
I don't believe this is needed for this CL.
|
| RegisterDependentCodeForEmbeddedMaps(code); |
| } |
| PopulateDeoptimizationData(code); |
| @@ -366,7 +367,37 @@ bool LCodeGen::GenerateBody() { |
| Comment(";;; @%d: %s.", current_instruction_, instr->Mnemonic()); |
| } |
| } |
| + |
| instr->CompileToNative(this); |
| + |
| + if (!CpuFeatures::IsSupported(SSE2)) { |
| + if (instr->ClobbersDoubleRegisters()) { |
|
danno
2013/04/08 12:57:27
I am not sure that is safe to do _after_ the Compi
mvstanton
2013/04/08 16:10:14
Thanks for your extensive help with this brain tea
|
| + if (x87_stack_depth_ > 0) { |
| + PopX87(); |
| + } |
| + } |
| + |
| + ASSERT(!instr->HasDoubleRegisterResult() || x87_stack_depth_ == 1); |
| + |
| + if (FLAG_debug_code && FLAG_enable_slow_asserts) { |
| + // Make sure the floating point stack is either empty or has one item, |
| + // the result value of the instruction. |
| + int tos = (x87_stack_depth_ > 0) ? 7 : 0; |
| + const int kTopMask = 0x3800; |
| + __ push(eax); |
| + __ fwait(); |
| + __ fnstsw_ax(); |
| + __ and_(eax, kTopMask); |
| + __ shr(eax, 11); |
| + __ cmp(eax, Immediate(tos)); |
| + Label all_ok; |
| + __ j(equal, &all_ok); |
| + __ Check(equal, "FPU Top is not zero after instruction"); |
| + __ bind(&all_ok); |
| + __ fnclex(); |
| + __ pop(eax); |
| + } |
| + } |
| } |
| } |
| EnsureSpaceForLazyDeopt(); |
| @@ -521,6 +552,40 @@ bool LCodeGen::IsX87TopOfStack(LOperand* op) const { |
| } |
| +void LCodeGen::ReadX87Operand(Operand dst) { |
| + ASSERT(x87_stack_depth_ == 1); |
| + __ fst_d(dst); |
| +} |
| + |
| + |
| +void LCodeGen::PushX87Operand(Operand src) { |
|
danno
2013/04/08 12:57:27
Maybe make this PushX87DoubleOperand?
mvstanton
2013/04/08 16:10:14
Done.
|
| + ASSERT(x87_stack_depth_ == 0); |
| + x87_stack_depth_++; |
| + __ fld_d(src); |
| +} |
| + |
| + |
| +void LCodeGen::PushX87FloatOperand(Operand src) { |
| + ASSERT(x87_stack_depth_ == 0); |
| + x87_stack_depth_++; |
| + __ fld_s(src); |
| +} |
| + |
| + |
| +void LCodeGen::PopX87() { |
| + ASSERT(x87_stack_depth_ == 1); |
| + x87_stack_depth_--; |
| + __ fstp(0); |
| +} |
| + |
| + |
| +void LCodeGen::MarkReturnX87Result() { |
|
danno
2013/04/08 12:57:27
Maybe CurrentInstructionReturnsX87Result?
mvstanton
2013/04/08 16:10:14
Done.
|
| + ASSERT(x87_stack_depth_ <= 1); |
| + if (x87_stack_depth_ == 0) { |
| + x87_stack_depth_ = 1; |
| + } |
| +} |
| + |
| Register LCodeGen::ToRegister(LOperand* op) const { |
| ASSERT(op->IsRegister()); |
| return ToRegister(op->index()); |
| @@ -846,6 +911,8 @@ void LCodeGen::RegisterEnvironmentForDeoptimization( |
| void LCodeGen::DeoptimizeIf(Condition cc, LEnvironment* environment) { |
| RegisterEnvironmentForDeoptimization(environment, Safepoint::kNoLazyDeopt); |
| ASSERT(environment->HasBeenRegistered()); |
| + // It's an error to deoptimize with the x87 fp stack in use. |
| + ASSERT(x87_stack_depth_ == 0); |
| int id = environment->deoptimization_index(); |
| ASSERT(info()->IsOptimizing() || info()->IsStub()); |
| Deoptimizer::BailoutType bailout_type = info()->IsStub() |
| @@ -1689,40 +1756,46 @@ void LCodeGen::DoConstantI(LConstantI* instr) { |
| void LCodeGen::DoConstantD(LConstantD* instr) { |
| - ASSERT(instr->result()->IsDoubleRegister()); |
| - XMMRegister res = ToDoubleRegister(instr->result()); |
| double v = instr->value(); |
| - // Use xor to produce +0.0 in a fast and compact way, but avoid to |
| - // do so if the constant is -0.0. |
| - if (BitCast<uint64_t, double>(v) == 0) { |
| - __ xorps(res, res); |
| + uint64_t int_val = BitCast<uint64_t, double>(v); |
| + int32_t lower = static_cast<int32_t>(int_val); |
| + int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt)); |
| + |
| + if (!CpuFeatures::IsSafeForSnapshot(SSE2)) { |
| + __ push(Immediate(lower)); |
| + __ push(Immediate(upper)); |
| + PushX87Operand(Operand(esp, 0)); |
| + __ add(Operand(esp), Immediate(kDoubleSize)); |
| + MarkReturnX87Result(); |
| } else { |
| - Register temp = ToRegister(instr->temp()); |
| - uint64_t int_val = BitCast<uint64_t, double>(v); |
| - int32_t lower = static_cast<int32_t>(int_val); |
| - int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt)); |
| - if (CpuFeatures::IsSupported(SSE4_1)) { |
| - CpuFeatureScope scope1(masm(), SSE2); |
| - CpuFeatureScope scope2(masm(), SSE4_1); |
| - if (lower != 0) { |
| - __ Set(temp, Immediate(lower)); |
| - __ movd(res, Operand(temp)); |
| - __ Set(temp, Immediate(upper)); |
| - __ pinsrd(res, Operand(temp), 1); |
| + CpuFeatureScope scope1(masm(), SSE2); |
| + ASSERT(instr->result()->IsDoubleRegister()); |
| + XMMRegister res = ToDoubleRegister(instr->result()); |
| + if (int_val == 0) { |
| + __ xorps(res, res); |
| + } else { |
| + Register temp = ToRegister(instr->temp()); |
| + if (CpuFeatures::IsSupported(SSE4_1)) { |
| + CpuFeatureScope scope2(masm(), SSE4_1); |
| + if (lower != 0) { |
| + __ Set(temp, Immediate(lower)); |
| + __ movd(res, Operand(temp)); |
| + __ Set(temp, Immediate(upper)); |
| + __ pinsrd(res, Operand(temp), 1); |
| + } else { |
| + __ xorps(res, res); |
| + __ Set(temp, Immediate(upper)); |
| + __ pinsrd(res, Operand(temp), 1); |
| + } |
| } else { |
| - __ xorps(res, res); |
| __ Set(temp, Immediate(upper)); |
| - __ pinsrd(res, Operand(temp), 1); |
| - } |
| - } else { |
| - CpuFeatureScope scope(masm(), SSE2); |
| - __ Set(temp, Immediate(upper)); |
| - __ movd(res, Operand(temp)); |
| - __ psllq(res, 32); |
| - if (lower != 0) { |
| - __ Set(temp, Immediate(lower)); |
| - __ movd(xmm0, Operand(temp)); |
| - __ por(res, xmm0); |
| + __ movd(res, Operand(temp)); |
| + __ psllq(res, 32); |
| + if (lower != 0) { |
| + __ Set(temp, Immediate(lower)); |
| + __ movd(xmm0, Operand(temp)); |
| + __ por(res, xmm0); |
| + } |
| } |
| } |
| } |
| @@ -2714,6 +2787,7 @@ void LCodeGen::EmitReturn(LReturn* instr, bool dynamic_frame_alignment) { |
| __ Ret((parameter_count + extra_value_count) * kPointerSize, ecx); |
| } else { |
| Register reg = ToRegister(instr->parameter_count()); |
| + __ SmiUntag(reg); // it is a smi |
|
danno
2013/04/08 12:57:27
Is this an unrelated bug?
mvstanton
2013/04/08 16:10:14
Yep, removed.
|
| Register return_addr_reg = reg.is(ecx) ? ebx : ecx; |
| if (dynamic_frame_alignment && FLAG_debug_code) { |
| ASSERT(extra_value_count == 2); |
| @@ -3126,13 +3200,21 @@ void LCodeGen::DoLoadExternalArrayPointer( |
| void LCodeGen::DoAccessArgumentsAt(LAccessArgumentsAt* instr) { |
| Register arguments = ToRegister(instr->arguments()); |
| - Register length = ToRegister(instr->length()); |
| - Operand index = ToOperand(instr->index()); |
| Register result = ToRegister(instr->result()); |
| - // There are two words between the frame pointer and the last argument. |
| - // Subtracting from length accounts for one of them add one more. |
| - __ sub(length, index); |
| - __ mov(result, Operand(arguments, length, times_4, kPointerSize)); |
| + if (instr->length()->IsConstantOperand() && |
| + instr->index()->IsConstantOperand()) { |
| + int const_index = ToInteger32(LConstantOperand::cast(instr->index())); |
| + int const_length = ToInteger32(LConstantOperand::cast(instr->length())); |
| + int index = (const_length - const_index) + 1; |
| + __ mov(result, Operand(arguments, index * kPointerSize)); |
| + } else { |
| + Register length = ToRegister(instr->length()); |
| + Operand index = ToOperand(instr->index()); |
| + // There are two words between the frame pointer and the last argument. |
| + // Subtracting from length accounts for one of them add one more. |
| + __ sub(length, index); |
| + __ mov(result, Operand(arguments, length, times_4, kPointerSize)); |
| + } |
|
danno
2013/04/08 12:57:27
Unrelated changes?
mvstanton
2013/04/08 16:10:14
Yep, removed.
|
| } |
| @@ -3158,16 +3240,16 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { |
| __ movss(result, operand); |
| __ cvtss2sd(result, result); |
| } else { |
| - __ fld_s(operand); |
| - HandleX87FPReturnValue(instr); |
| + PushX87FloatOperand(operand); |
| + MarkReturnX87Result(); |
| } |
| } else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) { |
| if (CpuFeatures::IsSupported(SSE2)) { |
| CpuFeatureScope scope(masm(), SSE2); |
| __ movdbl(ToDoubleRegister(instr->result()), operand); |
| } else { |
| - __ fld_d(operand); |
| - HandleX87FPReturnValue(instr); |
| + PushX87Operand(operand); |
| + MarkReturnX87Result(); |
| } |
| } else { |
| Register result(ToRegister(instr->result())); |
| @@ -3212,29 +3294,6 @@ void LCodeGen::DoLoadKeyedExternalArray(LLoadKeyed* instr) { |
| } |
| -void LCodeGen::HandleX87FPReturnValue(LInstruction* instr) { |
| - if (IsX87TopOfStack(instr->result())) { |
| - // Return value is already on stack. If the value has no uses, then |
| - // pop it off the FP stack. Otherwise, make sure that there are enough |
| - // copies of the value on the stack to feed all of the usages, e.g. |
| - // when the following instruction uses the return value in multiple |
| - // inputs. |
| - int count = instr->hydrogen_value()->UseCount(); |
| - if (count == 0) { |
| - __ fstp(0); |
| - } else { |
| - count--; |
| - ASSERT(count <= 7); |
| - while (count-- > 0) { |
| - __ fld(0); |
| - } |
| - } |
| - } else { |
| - __ fstp_d(ToOperand(instr->result())); |
| - } |
| -} |
| - |
| - |
| void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) { |
| if (instr->hydrogen()->RequiresHoleCheck()) { |
| int offset = FixedDoubleArray::kHeaderSize - kHeapObjectTag + |
| @@ -3261,8 +3320,8 @@ void LCodeGen::DoLoadKeyedFixedDoubleArray(LLoadKeyed* instr) { |
| XMMRegister result = ToDoubleRegister(instr->result()); |
| __ movdbl(result, double_load_operand); |
| } else { |
| - __ fld_d(double_load_operand); |
| - HandleX87FPReturnValue(instr); |
| + PushX87Operand(double_load_operand); |
| + MarkReturnX87Result(); |
| } |
| } |
| @@ -4204,7 +4263,6 @@ void LCodeGen::DoInnerAllocatedObject(LInnerAllocatedObject* instr) { |
| void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) { |
| Register object = ToRegister(instr->object()); |
| - Register value = ToRegister(instr->value()); |
| int offset = instr->offset(); |
| if (!instr->transition().is_null()) { |
| @@ -4231,8 +4289,21 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) { |
| SmiCheck check_needed = |
| type.IsHeapObject() ? OMIT_SMI_CHECK : INLINE_SMI_CHECK; |
| if (instr->is_in_object()) { |
| - __ mov(FieldOperand(object, offset), value); |
| + if (instr->value()->IsConstantOperand()) { |
|
danno
2013/04/08 12:57:27
Since this has now gotten bigger, can you merge th
mvstanton
2013/04/08 16:10:14
All unrelated, removed!
|
| + LConstantOperand* operand_value = LConstantOperand::cast(instr->value()); |
| + if (IsInteger32(operand_value)) { |
| + int const_value = ToInteger32(operand_value); |
| + __ mov(FieldOperand(object, offset), Immediate(const_value)); |
| + } else { |
| + Handle<Object> handle_value = ToHandle(operand_value); |
| + __ mov(FieldOperand(object, offset), handle_value); |
| + } |
| + } else { |
| + __ mov(FieldOperand(object, offset), ToRegister(instr->value())); |
| + } |
| + |
| if (instr->hydrogen()->NeedsWriteBarrier()) { |
| + Register value = ToRegister(instr->value()); |
| Register temp = ToRegister(instr->temp()); |
| // Update the write barrier for the object for in-object properties. |
| __ RecordWriteField(object, |
| @@ -4246,8 +4317,22 @@ void LCodeGen::DoStoreNamedField(LStoreNamedField* instr) { |
| } else { |
| Register temp = ToRegister(instr->temp()); |
| __ mov(temp, FieldOperand(object, JSObject::kPropertiesOffset)); |
| - __ mov(FieldOperand(temp, offset), value); |
| + |
| + if (instr->value()->IsConstantOperand()) { |
| + LConstantOperand* operand_value = LConstantOperand::cast(instr->value()); |
| + if (IsInteger32(operand_value)) { |
| + int const_value = ToInteger32(operand_value); |
| + __ mov(FieldOperand(temp, offset), Immediate(const_value)); |
| + } else { |
| + Handle<Object> handle_value = ToHandle(operand_value); |
| + __ mov(FieldOperand(temp, offset), handle_value); |
| + } |
| + } else { |
| + __ mov(FieldOperand(temp, offset), ToRegister(instr->value())); |
| + } |
| + |
| if (instr->hydrogen()->NeedsWriteBarrier()) { |
| + Register value = ToRegister(instr->value()); |
| // Update the write barrier for the properties array. |
| // object is used as a scratch register. |
| __ RecordWriteField(temp, |
| @@ -4311,12 +4396,21 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { |
| 0, |
| instr->additional_index())); |
| if (elements_kind == EXTERNAL_FLOAT_ELEMENTS) { |
| - CpuFeatureScope scope(masm(), SSE2); |
| - __ cvtsd2ss(xmm0, ToDoubleRegister(instr->value())); |
| - __ movss(operand, xmm0); |
| + if (CpuFeatures::IsSafeForSnapshot(SSE2)) { |
| + CpuFeatureScope scope(masm(), SSE2); |
| + __ cvtsd2ss(xmm0, ToDoubleRegister(instr->value())); |
| + __ movss(operand, xmm0); |
| + } else { |
| + __ fld(0); |
| + __ fstp_s(operand); |
| + } |
| } else if (elements_kind == EXTERNAL_DOUBLE_ELEMENTS) { |
| - CpuFeatureScope scope(masm(), SSE2); |
| - __ movdbl(operand, ToDoubleRegister(instr->value())); |
| + if (CpuFeatures::IsSafeForSnapshot(SSE2)) { |
| + CpuFeatureScope scope(masm(), SSE2); |
| + __ movdbl(operand, ToDoubleRegister(instr->value())); |
| + } else { |
| + __ fst_d(operand); |
| + } |
| } else { |
| Register value = ToRegister(instr->value()); |
| switch (elements_kind) { |
| @@ -4351,21 +4445,8 @@ void LCodeGen::DoStoreKeyedExternalArray(LStoreKeyed* instr) { |
| void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) { |
| - CpuFeatureScope scope(masm(), SSE2); |
| - XMMRegister value = ToDoubleRegister(instr->value()); |
| - |
| - if (instr->NeedsCanonicalization()) { |
| - Label have_value; |
| - |
| - __ ucomisd(value, value); |
| - __ j(parity_odd, &have_value); // NaN. |
| - |
| - ExternalReference canonical_nan_reference = |
| - ExternalReference::address_of_canonical_non_hole_nan(); |
| - __ movdbl(value, Operand::StaticVariable(canonical_nan_reference)); |
| - __ bind(&have_value); |
| - } |
| - |
| + ExternalReference canonical_nan_reference = |
| + ExternalReference::address_of_canonical_non_hole_nan(); |
| Operand double_store_operand = BuildFastArrayOperand( |
| instr->elements(), |
| instr->key(), |
| @@ -4373,12 +4454,72 @@ void LCodeGen::DoStoreKeyedFixedDoubleArray(LStoreKeyed* instr) { |
| FAST_DOUBLE_ELEMENTS, |
| FixedDoubleArray::kHeaderSize - kHeapObjectTag, |
| instr->additional_index()); |
| - __ movdbl(double_store_operand, value); |
| + |
| + if (CpuFeatures::IsSafeForSnapshot(SSE2)) { |
| + CpuFeatureScope scope(masm(), SSE2); |
| + XMMRegister value = ToDoubleRegister(instr->value()); |
| + |
| + if (instr->NeedsCanonicalization()) { |
| + Label have_value; |
| + |
| + __ ucomisd(value, value); |
| + __ j(parity_odd, &have_value); // NaN. |
| + |
| + __ movdbl(value, Operand::StaticVariable(canonical_nan_reference)); |
| + __ bind(&have_value); |
| + } |
| + |
| + __ movdbl(double_store_operand, value); |
| + } else { |
| + // Can't use SSE2 in the serializer |
| + if (instr->hydrogen()->IsConstantHoleStore()) { |
| + // This means we should store the (double) hole. No floating point |
| + // registers required. |
| + double nan_double = FixedDoubleArray::hole_nan_as_double(); |
| + uint64_t int_val = BitCast<uint64_t, double>(nan_double); |
| + int32_t lower = static_cast<int32_t>(int_val); |
| + int32_t upper = static_cast<int32_t>(int_val >> (kBitsPerInt)); |
| + |
| + __ mov(double_store_operand, Immediate(lower)); |
| + Operand double_store_operand2 = BuildFastArrayOperand( |
| + instr->elements(), |
| + instr->key(), |
| + instr->hydrogen()->key()->representation(), |
| + FAST_DOUBLE_ELEMENTS, |
| + FixedDoubleArray::kHeaderSize - kHeapObjectTag + kPointerSize, |
| + instr->additional_index()); |
| + __ mov(double_store_operand2, Immediate(upper)); |
| + } else { |
| + Label no_special_nan_handling; |
| + ASSERT(x87_stack_depth_ > 0); |
| + |
| + if (instr->NeedsCanonicalization()) { |
| + __ fld(0); |
| + __ fld(0); |
| + __ FCmp(); |
| + |
| + __ j(parity_odd, &no_special_nan_handling); |
| + __ sub(esp, Immediate(kDoubleSize)); |
| + __ fst_d(MemOperand(esp, 0)); |
| + __ cmp(MemOperand(esp, sizeof(kHoleNanLower32)), |
| + Immediate(kHoleNanUpper32)); |
| + __ add(esp, Immediate(kDoubleSize)); |
| + Label canonicalize; |
| + __ j(not_equal, &canonicalize); |
| + __ jmp(&no_special_nan_handling); |
| + __ bind(&canonicalize); |
| + __ fstp(0); |
| + __ fld_d(Operand::StaticVariable(canonical_nan_reference)); |
| + } |
| + |
| + __ bind(&no_special_nan_handling); |
| + __ fst_d(double_store_operand); |
| + } |
| + } |
| } |
| void LCodeGen::DoStoreKeyedFixedArray(LStoreKeyed* instr) { |
| - Register value = ToRegister(instr->value()); |
| Register elements = ToRegister(instr->elements()); |
| Register key = instr->key()->IsRegister() ? ToRegister(instr->key()) : no_reg; |
| @@ -4389,9 +4530,22 @@ void LCodeGen::DoStoreKeyedFixedArray(LStoreKeyed* instr) { |
| FAST_ELEMENTS, |
| FixedArray::kHeaderSize - kHeapObjectTag, |
| instr->additional_index()); |
| - __ mov(operand, value); |
| + if (instr->value()->IsRegister()) { |
|
danno
2013/04/08 12:57:27
Is this change related to SSE2 at all?
mvstanton
2013/04/08 16:10:14
Nope, removed.
|
| + __ mov(operand, ToRegister(instr->value())); |
| + } else { |
| + LConstantOperand* operand_value = LConstantOperand::cast(instr->value()); |
| + if (IsInteger32(operand_value)) { |
| + int const_value = ToInteger32(operand_value); |
| + __ mov(operand, Immediate(const_value)); |
| + } else { |
| + Handle<Object> handle_value = ToHandle(operand_value); |
| + __ mov(operand, handle_value); |
| + } |
| + } |
| if (instr->hydrogen()->NeedsWriteBarrier()) { |
| + ASSERT(instr->value()->IsRegister()); |
|
danno
2013/04/08 12:57:27
Is this change related to SSE2 at all?
mvstanton
2013/04/08 16:10:14
Nosir, removed.
|
| + Register value = ToRegister(instr->value()); |
| ASSERT(!instr->key()->IsConstantOperand()); |
| HType type = instr->hydrogen()->value()->type(); |
| SmiCheck check_needed = |
| @@ -4805,9 +4959,6 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { |
| XMMRegister input_reg = ToDoubleRegister(instr->value()); |
| __ ucomisd(input_reg, input_reg); |
| } else { |
| - if (!IsX87TopOfStack(instr->value())) { |
| - __ fld_d(ToOperand(instr->value())); |
| - } |
| __ fld(0); |
| __ fld(0); |
| __ FCmp(); |
| @@ -4829,6 +4980,9 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { |
| __ j(not_equal, &canonicalize); |
| __ add(esp, Immediate(kDoubleSize)); |
| __ mov(reg, factory()->the_hole_value()); |
| + if (!use_sse2) { |
| + __ fstp(0); |
| + } |
| __ jmp(&done); |
| __ bind(&canonicalize); |
| __ add(esp, Immediate(kDoubleSize)); |
| @@ -4858,10 +5012,7 @@ void LCodeGen::DoNumberTagD(LNumberTagD* instr) { |
| XMMRegister input_reg = ToDoubleRegister(instr->value()); |
| __ movdbl(FieldOperand(reg, HeapNumber::kValueOffset), input_reg); |
| } else { |
| - if (!IsX87TopOfStack(instr->value())) { |
| - __ fld_d(ToOperand(instr->value())); |
| - } |
| - __ fstp_d(FieldOperand(reg, HeapNumber::kValueOffset)); |
| + __ fst_d(FieldOperand(reg, HeapNumber::kValueOffset)); |
| } |
| __ bind(&done); |
| } |
| @@ -4909,6 +5060,76 @@ void LCodeGen::DoSmiUntag(LSmiUntag* instr) { |
| } |
| +void LCodeGen::EmitNumberUntagDNoSSE2(Register input_reg, |
| + Register temp_reg, |
| + bool deoptimize_on_undefined, |
| + bool deoptimize_on_minus_zero, |
| + LEnvironment* env, |
| + NumberUntagDMode mode) { |
| + Label load_smi, done; |
| + |
| + if (mode == NUMBER_CANDIDATE_IS_ANY_TAGGED) { |
| + // Smi check. |
| + __ JumpIfSmi(input_reg, &load_smi, Label::kNear); |
| + |
| + // Heap number map check. |
| + __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset), |
| + factory()->heap_number_map()); |
| + if (deoptimize_on_undefined) { |
| + DeoptimizeIf(not_equal, env); |
| + } else { |
| + Label heap_number; |
| + __ j(equal, &heap_number, Label::kNear); |
| + |
| + __ cmp(input_reg, factory()->undefined_value()); |
| + DeoptimizeIf(not_equal, env); |
| + |
| + // Convert undefined to NaN. |
| + ExternalReference nan = |
| + ExternalReference::address_of_canonical_non_hole_nan(); |
| + __ fld_d(Operand::StaticVariable(nan)); |
| + __ jmp(&done, Label::kNear); |
| + __ bind(&heap_number); |
| + } |
| + // Heap number to XMM conversion. |
| + __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset)); |
| + if (deoptimize_on_minus_zero) { |
| + __ fldz(); |
| + __ FCmp(); |
| + __ fld_d(FieldOperand(input_reg, HeapNumber::kValueOffset)); |
| + __ j(not_zero, &done, Label::kNear); |
| + // TODO(mvstanton): the code to check for -0.0 on non-sse2 is not |
| + // complete, write it here. |
| + |
| + // Pop FPU stack before deoptimizing. |
| + __ fstp(0); |
| + DeoptimizeIf(not_zero, env); |
| + } |
| + __ jmp(&done, Label::kNear); |
| + } else if (mode == NUMBER_CANDIDATE_IS_SMI_OR_HOLE) { |
| + __ test(input_reg, Immediate(kSmiTagMask)); |
| + DeoptimizeIf(not_equal, env); |
| + } else if (mode == NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE) { |
| + __ test(input_reg, Immediate(kSmiTagMask)); |
| + __ j(zero, &load_smi); |
| + ExternalReference hole_nan_reference = |
| + ExternalReference::address_of_the_hole_nan(); |
| + __ fld_d(Operand::StaticVariable(hole_nan_reference)); |
| + __ jmp(&done, Label::kNear); |
| + } else { |
| + ASSERT(mode == NUMBER_CANDIDATE_IS_SMI); |
| + } |
| + |
| + __ bind(&load_smi); |
| + __ SmiUntag(input_reg); // Untag smi before converting to float. |
| + __ push(input_reg); |
| + __ fild_s(Operand(esp, 0)); |
| + __ pop(input_reg); |
| + __ SmiTag(input_reg); // Retag smi. |
| + __ bind(&done); |
| +} |
| + |
| + |
| void LCodeGen::EmitNumberUntagD(Register input_reg, |
| Register temp_reg, |
| XMMRegister result_reg, |
| @@ -5021,7 +5242,7 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) { |
| __ fisttp_d(Operand(esp, 0)); |
| __ mov(input_reg, Operand(esp, 0)); // Low word of answer is the result. |
| __ add(Operand(esp), Immediate(kDoubleSize)); |
| - } else { |
| + } else if (CpuFeatures::IsSupported(SSE2)) { |
| CpuFeatureScope scope(masm(), SSE2); |
| XMMRegister xmm_temp = ToDoubleRegister(instr->temp()); |
| __ movdbl(xmm0, FieldOperand(input_reg, HeapNumber::kValueOffset)); |
| @@ -5035,6 +5256,8 @@ void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) { |
| __ ucomisd(xmm_temp, xmm0); |
| DeoptimizeIf(not_equal, instr->environment()); |
| DeoptimizeIf(parity_even, instr->environment()); // NaN. |
| + } else { |
| + UNREACHABLE(); |
| } |
| } else if (CpuFeatures::IsSupported(SSE2)) { |
| CpuFeatureScope scope(masm(), SSE2); |
| @@ -5079,18 +5302,169 @@ void LCodeGen::DoTaggedToI(LTaggedToI* instr) { |
| LOperand* input = instr->value(); |
| ASSERT(input->IsRegister()); |
| - ASSERT(input->Equals(instr->result())); |
| - |
| Register input_reg = ToRegister(input); |
| + ASSERT(input_reg.is(ToRegister(instr->result()))); |
| DeferredTaggedToI* deferred = new(zone()) DeferredTaggedToI(this, instr); |
| - // Smi check. |
| __ JumpIfNotSmi(input_reg, deferred->entry()); |
| + __ SmiUntag(input_reg); |
| + __ bind(deferred->exit()); |
| +} |
| - // Smi to int32 conversion |
| - __ SmiUntag(input_reg); // Untag smi. |
| +void LCodeGen::DoDeferredTaggedToINoSSE2(LTaggedToINoSSE2* instr) { |
| + Label done, heap_number; |
| + Register result_reg = ToRegister(instr->result()); |
| + Register input_reg = ToRegister(instr->value()); |
| + |
| + // Heap number map check. |
| + __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset), |
| + factory()->heap_number_map()); |
| + __ j(equal, &heap_number, Label::kNear); |
| + // Check for undefined. Undefined is converted to zero for truncating |
| + // conversions. |
| + __ cmp(input_reg, factory()->undefined_value()); |
| + __ RecordComment("Deferred TaggedToI: cannot truncate"); |
| + DeoptimizeIf(not_equal, instr->environment()); |
| + __ xor_(result_reg, result_reg); |
| + __ jmp(&done, Label::kFar); |
| + __ bind(&heap_number); |
| + |
| + // Surprisingly, all of this crazy bit manipulation is considerably |
| + // faster than using the built-in x86 CPU conversion functions (about 6x). |
| + Label right_exponent, adjust_bias, zero_result; |
| + Register scratch = ToRegister(instr->scratch()); |
| + Register scratch2 = ToRegister(instr->scratch2()); |
| + // Get exponent word. |
| + __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset)); |
| + // Get exponent alone in scratch2. |
| + __ mov(scratch2, scratch); |
| + __ and_(scratch2, HeapNumber::kExponentMask); |
| + __ shr(scratch2, HeapNumber::kExponentShift); |
| + if (instr->truncating()) { |
| + __ j(zero, &zero_result); |
| + } else { |
| + __ j(not_zero, &adjust_bias); |
| + __ test(scratch, Immediate(HeapNumber::kMantissaMask)); |
| + DeoptimizeIf(not_zero, instr->environment()); |
| + __ cmp(FieldOperand(input_reg, HeapNumber::kMantissaOffset), Immediate(0)); |
| + DeoptimizeIf(not_equal, instr->environment()); |
| + __ bind(&adjust_bias); |
| + } |
| + __ sub(scratch2, Immediate(HeapNumber::kExponentBias)); |
| + if (!instr->truncating()) { |
| + DeoptimizeIf(negative, instr->environment()); |
| + } else { |
| + __ j(negative, &zero_result); |
| + } |
| + |
| + // Get the second half of the double. For some exponents we don't |
| + // actually need this because the bits get shifted out again, but |
| + // it's probably slower to test than just to do it. |
| + Register scratch3 = ToRegister(instr->scratch3()); |
| + __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset)); |
| + __ xor_(result_reg, result_reg); |
| + |
| + const uint32_t non_int32_exponent = 31; |
| + __ cmp(scratch2, Immediate(non_int32_exponent)); |
| + // If we have a match of the int32 exponent then skip some logic. |
| + __ j(equal, &right_exponent, Label::kNear); |
| + // If the number doesn't find in an int32, deopt. |
| + DeoptimizeIf(greater, instr->environment()); |
| + |
| + // Exponent word in scratch, exponent in scratch2. We know that 0 <= exponent |
| + // < 31. |
| + __ mov(result_reg, Immediate(31)); |
| + __ sub(result_reg, scratch2); |
| + |
| + __ bind(&right_exponent); |
| + |
| + // Save off exponent for negative check later. |
| + __ mov(scratch2, scratch); |
| + |
| + // Here result_reg is the shift, scratch is the exponent word. |
| + // Get the top bits of the mantissa. |
| + __ and_(scratch, HeapNumber::kMantissaMask); |
| + // Put back the implicit 1. |
| + __ or_(scratch, 1 << HeapNumber::kExponentShift); |
| + // Shift up the mantissa bits to take up the space the exponent used to |
| + // take. We have kExponentShift + 1 significant bits int he low end of the |
| + // word. Shift them to the top bits. |
| + const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 1; |
| + __ shl(scratch, shift_distance); |
| + if (!instr->truncating()) { |
| + // If not truncating, a non-zero value in the bottom 22 bits means a |
| + // non-integral value --> trigger a deopt. |
| + __ test(scratch3, Immediate((1 << (32 - shift_distance)) - 1)); |
| + DeoptimizeIf(not_equal, instr->environment()); |
| + } |
| + // Shift down 22 bits to get the most significant 10 bits or the low |
| + // mantissa word. |
| + __ shr(scratch3, 32 - shift_distance); |
| + __ or_(scratch3, scratch); |
| + if (!instr->truncating()) { |
| + // If truncating, a non-zero value in the bits that will be shifted away |
| + // when adjusting the exponent means rounding --> deopt. |
| + __ mov(scratch, 0x1); |
| + ASSERT(result_reg.is(ecx)); |
| + __ shl_cl(scratch); |
| + __ dec(scratch); |
| + __ test(scratch3, scratch); |
| + DeoptimizeIf(not_equal, instr->environment()); |
| + } |
| + // Move down according to the exponent. |
| + ASSERT(result_reg.is(ecx)); |
| + __ shr_cl(scratch3); |
| + // Now the unsigned 32-bit answer is in scratch3. We need to move it to |
| + // result_reg and we may need to fix the sign. |
| + Label negative_result; |
| + __ xor_(result_reg, result_reg); |
| + __ cmp(scratch2, result_reg); |
| + __ j(less, &negative_result, Label::kNear); |
| + __ cmp(scratch3, result_reg); |
| + __ mov(result_reg, scratch3); |
| + // If the result is > MAX_INT, result doesn't fit in signed 32-bit --> deopt. |
| + DeoptimizeIf(less, instr->environment()); |
| + __ jmp(&done, Label::kNear); |
| + __ bind(&zero_result); |
| + __ xor_(result_reg, result_reg); |
| + __ jmp(&done, Label::kNear); |
| + __ bind(&negative_result); |
| + __ sub(result_reg, scratch3); |
| + if (!instr->truncating()) { |
| + // -0.0 triggers a deopt. |
| + DeoptimizeIf(zero, instr->environment()); |
| + } |
| + // If the negative subtraction overflows into a positive number, there was an |
| + // overflow --> deopt. |
| + DeoptimizeIf(positive, instr->environment()); |
| + __ bind(&done); |
| +} |
| + |
| + |
| +void LCodeGen::DoTaggedToINoSSE2(LTaggedToINoSSE2* instr) { |
| + class DeferredTaggedToINoSSE2: public LDeferredCode { |
| + public: |
| + DeferredTaggedToINoSSE2(LCodeGen* codegen, LTaggedToINoSSE2* instr) |
| + : LDeferredCode(codegen), instr_(instr) { } |
| + virtual void Generate() { codegen()->DoDeferredTaggedToINoSSE2(instr_); } |
| + virtual LInstruction* instr() { return instr_; } |
| + private: |
| + LTaggedToINoSSE2* instr_; |
| + }; |
| + |
| + LOperand* input = instr->value(); |
| + ASSERT(input->IsRegister()); |
| + Register input_reg = ToRegister(input); |
| + ASSERT(input_reg.is(ToRegister(instr->result()))); |
| + |
| + DeferredTaggedToINoSSE2* deferred = |
| + new(zone()) DeferredTaggedToINoSSE2(this, instr); |
| + |
| + // Smi check. |
| + __ JumpIfNotSmi(input_reg, deferred->entry()); |
| + __ SmiUntag(input_reg); // Untag smi. |
| __ bind(deferred->exit()); |
| } |
| @@ -5103,32 +5477,31 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) { |
| LOperand* result = instr->result(); |
| ASSERT(result->IsDoubleRegister()); |
| - if (CpuFeatures::IsSupported(SSE2)) { |
| - CpuFeatureScope scope(masm(), SSE2); |
| - Register input_reg = ToRegister(input); |
| - XMMRegister result_reg = ToDoubleRegister(result); |
| - |
| - bool deoptimize_on_minus_zero = |
| - instr->hydrogen()->deoptimize_on_minus_zero(); |
| - Register temp_reg = deoptimize_on_minus_zero ? ToRegister(temp) : no_reg; |
| - |
| - NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED; |
| - HValue* value = instr->hydrogen()->value(); |
| - if (value->type().IsSmi()) { |
| - if (value->IsLoadKeyed()) { |
| - HLoadKeyed* load = HLoadKeyed::cast(value); |
| - if (load->UsesMustHandleHole()) { |
| - if (load->hole_mode() == ALLOW_RETURN_HOLE) { |
| - mode = NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE; |
| - } else { |
| - mode = NUMBER_CANDIDATE_IS_SMI_OR_HOLE; |
| - } |
| + Register input_reg = ToRegister(input); |
| + bool deoptimize_on_minus_zero = |
| + instr->hydrogen()->deoptimize_on_minus_zero(); |
| + Register temp_reg = deoptimize_on_minus_zero ? ToRegister(temp) : no_reg; |
| + |
| + NumberUntagDMode mode = NUMBER_CANDIDATE_IS_ANY_TAGGED; |
| + HValue* value = instr->hydrogen()->value(); |
| + if (value->type().IsSmi()) { |
| + if (value->IsLoadKeyed()) { |
| + HLoadKeyed* load = HLoadKeyed::cast(value); |
| + if (load->UsesMustHandleHole()) { |
| + if (load->hole_mode() == ALLOW_RETURN_HOLE) { |
| + mode = NUMBER_CANDIDATE_IS_SMI_CONVERT_HOLE; |
| } else { |
| - mode = NUMBER_CANDIDATE_IS_SMI; |
| + mode = NUMBER_CANDIDATE_IS_SMI_OR_HOLE; |
| } |
| + } else { |
| + mode = NUMBER_CANDIDATE_IS_SMI; |
| } |
| } |
| + } |
| + if (CpuFeatures::IsSupported(SSE2)) { |
| + CpuFeatureScope scope(masm(), SSE2); |
| + XMMRegister result_reg = ToDoubleRegister(result); |
| EmitNumberUntagD(input_reg, |
| temp_reg, |
| result_reg, |
| @@ -5137,7 +5510,13 @@ void LCodeGen::DoNumberUntagD(LNumberUntagD* instr) { |
| instr->environment(), |
| mode); |
| } else { |
| - UNIMPLEMENTED(); |
| + EmitNumberUntagDNoSSE2(input_reg, |
| + temp_reg, |
| + instr->hydrogen()->deoptimize_on_undefined(), |
| + deoptimize_on_minus_zero, |
| + instr->environment(), |
| + mode); |
| + MarkReturnX87Result(); |
| } |
| } |
| @@ -5409,7 +5788,128 @@ void LCodeGen::DoClampTToUint8(LClampTToUint8* instr) { |
| __ bind(&is_smi); |
| __ SmiUntag(input_reg); |
| __ ClampUint8(input_reg); |
| + __ bind(&done); |
| +} |
| + |
| + |
| +void LCodeGen::DoClampTToUint8NoSSE2(LClampTToUint8NoSSE2* instr) { |
| + Register input_reg = ToRegister(instr->unclamped()); |
| + Register result_reg = ToRegister(instr->result()); |
| + Register scratch = ToRegister(instr->scratch()); |
| + Register scratch2 = ToRegister(instr->scratch2()); |
| + Register scratch3 = ToRegister(instr->scratch3()); |
| + Label is_smi, done, heap_number, valid_exponent, |
| + largest_value, zero_result, maybe_nan_or_infinity; |
| + |
| + __ JumpIfSmi(input_reg, &is_smi); |
| + |
| + // Check for heap number |
| + __ cmp(FieldOperand(input_reg, HeapObject::kMapOffset), |
| + factory()->heap_number_map()); |
| + __ j(equal, &heap_number, Label::kFar); |
| + |
| + // Check for undefined. Undefined is converted to zero for clamping |
| + // conversions. |
| + __ cmp(input_reg, factory()->undefined_value()); |
| + DeoptimizeIf(not_equal, instr->environment()); |
| + __ jmp(&zero_result); |
| + |
| + // Heap number |
| + __ bind(&heap_number); |
| + |
| + // Surprisingly, all of the hand-crafted bit-manipulations below are much |
| + // faster than the x86 FPU built-in instruction, especially since "banker's |
| + // rounding" would be additionally very expensive |
| + |
| + // Get exponent word. |
| + __ mov(scratch, FieldOperand(input_reg, HeapNumber::kExponentOffset)); |
| + __ mov(scratch3, FieldOperand(input_reg, HeapNumber::kMantissaOffset)); |
| + |
| + // Test for negative values --> clamp to zero |
| + __ test(scratch, scratch); |
| + __ j(negative, &zero_result); |
| + |
| + // Get exponent alone in scratch2. |
| + __ mov(scratch2, scratch); |
| + __ and_(scratch2, HeapNumber::kExponentMask); |
| + __ shr(scratch2, HeapNumber::kExponentShift); |
| + __ j(zero, &zero_result); |
| + __ sub(scratch2, Immediate(HeapNumber::kExponentBias - 1)); |
| + __ j(negative, &zero_result); |
| + |
| + const uint32_t non_int8_exponent = 7; |
| + __ cmp(scratch2, Immediate(non_int8_exponent + 1)); |
| + // If the exponent is too big, check for special values. |
| + __ j(greater, &maybe_nan_or_infinity, Label::kNear); |
| + |
| + __ bind(&valid_exponent); |
| + // Exponent word in scratch, exponent in scratch2. We know that 0 <= exponent |
| + // < 7. The shift bias is the number of bits to shift the mantissa such that |
| + // with an exponent of 7 such the that top-most one is in bit 30, allowing |
| + // detection the rounding overflow of a 255.5 to 256 (bit 31 goes from 0 to |
| + // 1). |
| + int shift_bias = (30 - HeapNumber::kExponentShift) - 7 - 1; |
| + __ lea(result_reg, MemOperand(scratch2, shift_bias)); |
| + // Here result_reg (ecx) is the shift, scratch is the exponent word. Get the |
| + // top bits of the mantissa. |
| + __ and_(scratch, HeapNumber::kMantissaMask); |
| + // Put back the implicit 1 of the mantissa |
| + __ or_(scratch, 1 << HeapNumber::kExponentShift); |
| + // Shift up to round |
| + __ shl_cl(scratch); |
| + // Use "banker's rounding" to spec: If fractional part of number is 0.5, then |
| + // use the bit in the "ones" place and add it to the "halves" place, which has |
| + // the effect of rounding to even. |
| + __ mov(scratch2, scratch); |
| + const uint32_t one_half_bit_shift = 30 - sizeof(uint8_t) * 8; |
| + const uint32_t one_bit_shift = one_half_bit_shift + 1; |
| + __ and_(scratch2, Immediate((1 << one_bit_shift) - 1)); |
| + __ cmp(scratch2, Immediate(1 << one_half_bit_shift)); |
| + Label no_round; |
| + __ j(less, &no_round); |
| + Label round_up; |
| + __ mov(scratch2, Immediate(1 << one_half_bit_shift)); |
| + __ j(greater, &round_up); |
| + __ test(scratch3, scratch3); |
| + __ j(not_zero, &round_up); |
| + __ mov(scratch2, scratch); |
| + __ and_(scratch2, Immediate(1 << one_bit_shift)); |
| + __ shr(scratch2, 1); |
| + __ bind(&round_up); |
| + __ add(scratch, scratch2); |
| + __ j(overflow, &largest_value); |
| + __ bind(&no_round); |
| + __ shr(scratch, 23); |
| + __ mov(result_reg, scratch); |
| + __ jmp(&done, Label::kNear); |
| + |
| + __ bind(&maybe_nan_or_infinity); |
| + // Check for NaN/Infinity, all other values map to 255 |
| + __ cmp(scratch2, Immediate(HeapNumber::kInfinityOrNanExponent + 1)); |
| + __ j(not_equal, &largest_value, Label::kNear); |
| + // Check for NaN, which differs from Infinity in that at least one mantissa |
| + // bit is set. |
| + __ and_(scratch, HeapNumber::kMantissaMask); |
| + __ or_(scratch, FieldOperand(input_reg, HeapNumber::kMantissaOffset)); |
| + __ j(not_zero, &zero_result); // M!=0 --> NaN |
| + // Infinity -> Fall through to map to 255. |
| + |
| + __ bind(&largest_value); |
| + __ mov(result_reg, Immediate(255)); |
| + __ jmp(&done, Label::kNear); |
| + |
| + __ bind(&zero_result); |
| + __ xor_(result_reg, result_reg); |
| + __ jmp(&done); |
| + |
| + // smi |
| + __ bind(&is_smi); |
| + if (!input_reg.is(result_reg)) { |
| + __ mov(result_reg, input_reg); |
| + } |
| + __ SmiUntag(result_reg); |
| + __ ClampUint8(result_reg); |
| __ bind(&done); |
| } |