Chromium Code Reviews| Index: src/arm/lithium-codegen-arm.cc |
| =================================================================== |
| --- src/arm/lithium-codegen-arm.cc (revision 7092) |
| +++ src/arm/lithium-codegen-arm.cc (working copy) |
| @@ -2716,6 +2716,188 @@ |
| } |
| +void LCodeGen::TryVcvtTruncation(Register result, |
| + DwVfpRegister double_input, |
| + Register saved_fpscr, |
| + Register current_fpscr, |
| + SwVfpRegister single_scratch, |
| + TruncationType type, |
| + Label* success) { |
| + // Cumulative exception flags. |
| + __ bic(current_fpscr, saved_fpscr, Operand(kVFPExceptionMask | |
| + kVFPFlushToZeroMask)); |
| + __ vmsr(current_fpscr); |
| + // Try a standard vfp floating-point to integer truncation, using the |
| + // default 'round to zero' mode. |
| + if (type == kSignedTruncation) { |
| + __ vcvt_s32_f64(single_scratch, double_input); |
| + } else { |
| + __ vcvt_u32_f64(single_scratch, double_input); |
| + } |
| + |
| + // Retrieve FPSCR and check for vfp exceptions. |
| + __ vmrs(current_fpscr); |
| + __ tst(current_fpscr, Operand(kVFPExceptionMask)); |
| + // Load the result and restore the FPSCR. |
| + __ vmov(result, single_scratch); |
| + // Restore the saved FPSCR. |
| + __ vmsr(saved_fpscr); |
| + // If no vfp exceptions were raised we are done. Otherwise fall through. |
| + __ b(eq, success); |
| +} |
| + |
| + |
| +// The truncation process is: |
| +// 1: Try to truncate using VFP floating-point to integer vcvt instructions. |
| +// a: Try to truncate to a signed int. |
| +// b: If that fails, try to truncate to an unsigned int. |
| +// 2: If that fails, try to bring back the input value in the 32bit int range. |
| +// If we succeed jump backward to let vcvt instructions truncate the value. |
| +// 3: If we could not bring back the value to the int32 range, check for special |
| +// cases. |
| +// 4: If that also fails, fall through. The following code should handle the |
| +// failure, probably by deoptimizing. |
| +void LCodeGen::EmitECMATruncate(Register result, |
|
Karl Klose
2011/03/09 10:37:51
As fschneider suggested, it would be good to move
Alexandre
2011/03/15 08:45:39
I first moved everything to a stub. Then I refact
|
| + Register scratch1, |
| + Register scratch2, |
| + DwVfpRegister double_input, |
| + DwVfpRegister double_scratch1, |
| + DwVfpRegister double_scratch2, |
| + Label* done) { |
| + ASSERT(!scratch1.is(result)); |
| + ASSERT(!scratch2.is(result)); |
| + ASSERT(!scratch1.is(scratch2)); |
| + ASSERT(!double_scratch1.is(double_input)); |
| + ASSERT(!double_scratch2.is(double_input)); |
| + ASSERT(!double_scratch1.is(double_scratch2)); |
| + |
| + Register prev_fpscr = scratch1; |
| + Register curr_fpscr = scratch2; |
| + scratch1 = no_reg; |
| + scratch2 = no_reg; |
| + |
| + SwVfpRegister single_scratch = double_scratch2.low(); |
| + |
| + Label retry, check_special_cases; |
| + |
| + // Save the current FPSCR. |
| + __ vmrs(prev_fpscr); |
| + __ bind(&retry); |
| + |
| + // Try standard vfp floating-point to integer truncations, using the |
| + // default 'round to zero' mode. |
|
Søren Thygesen Gjesse
2011/03/08 16:15:03
Drive-by:
How fast is the VFP rounding? Maybe just
Karl Klose
2011/03/09 10:37:51
We should measure later, if bit-fiddeling code as
Alexandre
2011/03/15 08:45:39
I initially thought that the vfp would be faster.
|
| + TryVcvtTruncation(result, |
| + double_input, |
| + prev_fpscr, |
| + curr_fpscr, |
| + single_scratch, |
| + kSignedTruncation, |
| + done); |
| + |
| + // Exceptions were raised. Try an unsigned conversion. |
| + TryVcvtTruncation(result, |
| + double_input, |
| + prev_fpscr, |
| + curr_fpscr, |
| + single_scratch, |
| + kUnsignedTruncation, |
| + done); |
| + |
| + |
| + // Standard conversion did not work. Try to handle manually. |
| + |
| + // Clear vfp cumulative exception flags. |
| + __ bic(curr_fpscr, curr_fpscr, Operand(kVFPExceptionMask)); |
| + __ vmsr(curr_fpscr); |
| + |
| + // The truncating conversion is invariant modulo 2^32. |
| + // If we are lucky, we can easily bring the input value to the |
| + // [-2^32, 2^32] range. |
| + Label positive, in_two_31_range; |
| + const double two_31_value = 2147483648.0; |
|
Karl Klose
2011/03/09 10:37:51
Constants should be formatted as follows: kTwo31Va
Alexandre
2011/03/15 08:45:39
Done.
|
| + const double two_32_value = 4294967296.0; |
| + // Start bringing the input value to the [-2^32, 2^32] range. |
| + DwVfpRegister two_32 = double_scratch2; |
| + __ vmov(two_32, two_32_value); |
| + __ vdiv(double_scratch1, double_input, two_32); |
| + __ vcvt_s32_f64(double_scratch1.low(), double_scratch1); |
| + __ vcvt_f64_s32(double_scratch1, double_scratch1.low()); |
| + __ vmul(double_scratch1, double_scratch1, two_32); |
| + // Test for vfp exceptions. |
| + __ vmrs(curr_fpscr); |
| + __ tst(curr_fpscr, Operand(kVFPExceptionMask)); |
| + // The following code won't work if vfp exceptions were raised. |
| + // (Overflow is raised for high values, infinity. Invalid exception for NaN.) |
| + __ b(ne, &check_special_cases); |
| + // Perform the subtraction after the branch to preserve the input. |
| + __ vsub(double_input, double_input, double_scratch1); |
| + |
| + // double_input: value brought back to [-2^32, 2^32]. |
| + |
| + // Get the value rounded toward 0. |
| + DwVfpRegister two_31 = double_scratch2; |
| + __ vabs(double_scratch1, double_input); |
| + __ vmov(two_31, two_31_value); |
| + __ vcmp(double_scratch1, two_31); |
| + __ vmrs(pc); |
| + __ b(lt, &in_two_31_range); |
| + |
| + // The value is in the [-2^32, -2^31] U [2^31, 2^32] range. |
| + // Add or subtrct 2^31 to easily round it toward zero. |
|
Karl Klose
2011/03/09 10:37:51
subtrct -> subtract.
Alexandre
2011/03/15 08:45:39
Done.
|
| + // Push negative values below -2^31 to the positive range to let vcvt_u32_f64 |
| + // handle the conversion. (For negative value we add 2^31 to easily round, |
| + // then add 2^31 again instead of subtracting. This works because the |
| + // operation is invariant modulo 2^32.) |
| + __ vcmp(double_input, 0.0); |
| + __ vmrs(pc); |
| + __ vadd(double_input, double_input, two_31, lt); |
| + __ vsub(double_input, double_input, two_31, ge); |
| + __ vcvt_s32_f64(double_input.low(), double_input); |
|
Karl Klose
2011/03/09 10:37:51
Should this code not use vcvt_u32_f64 as stated in
Alexandre
2011/03/15 08:45:39
No it should not. I updated the comment before to
|
| + __ vcvt_f64_s32(double_input, double_input.low()); |
| + __ vadd(double_input, double_input, two_31); |
| + __ b(&retry); |
| + |
| + __ bind(&in_two_31_range); |
| + // Round the value toward zero and jump back to let the standard |
| + // code handle the conversion. |
| + __ vcvt_s32_f64(double_input.low(), double_input); |
| + __ vcvt_f64_s32(double_input, double_input.low()); |
| + __ b(&retry); |
| + |
| + // We never fall through to here. |
| + // We always jump to 'done' if conversion was successful. |
| + if (FLAG_debug_code) { |
| + __ Abort("We should never fall through."); |
| + } |
| + |
| + // Check for a high exponent, infinity, and NaN, which should all return 0. |
| + // * If the unbiased exponent is greater than 52 + 32 = 84 then all mantissa |
| + // bits are shifted out of the 32bit integer range and the result is 0. |
| + // * NaN and Infinity have an exponent of 0x7ff, so the test below will also |
| + // detect them. |
| + |
| + __ bind(&check_special_cases); |
| + |
| + scratch2 = curr_fpscr; |
| + curr_fpscr = no_reg; |
| + |
| + // Get exponent alone in scratch2. |
| + __ vmov(scratch2, double_input.high()); |
| + __ Ubfx(scratch2, |
| + scratch2, |
| + HeapNumber::kExponentShift, |
| + HeapNumber::kExponentBits); |
| + const int32_t big_exp = 84; |
| + __ cmp(scratch2, Operand(HeapNumber::kExponentBias + big_exp)); |
| + __ mov(result, Operand(0)); |
| + __ b(ge, done); |
| + |
| + // We could not handle the truncation manually. |
| + // Restore the FPSCR and fall through. |
| + __ vmsr(prev_fpscr); |
| +} |
| + |
| + |
| void LCodeGen::DoMathLog(LUnaryMathOperation* instr) { |
| ASSERT(ToDoubleRegister(instr->result()).is(d2)); |
| TranscendentalCacheStub stub(TranscendentalCache::LOG, |
| @@ -3281,20 +3463,33 @@ |
| void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) { |
| - Label done; |
| Register input_reg = ToRegister(instr->InputAt(0)); |
| - Register scratch = scratch0(); |
| - DoubleRegister dbl_scratch = d0; |
| - SwVfpRegister flt_scratch = s0; |
| - DoubleRegister dbl_tmp = ToDoubleRegister(instr->TempAt(0)); |
| + Register scratch1 = scratch0(); |
| + Register scratch2 = ToRegister(instr->TempAt(0)); |
| + SwVfpRegister single_scratch = s0; |
| + DwVfpRegister double_scratch1 = d0; |
| + DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(1)); |
| + DwVfpRegister double_scratch3 = ToDoubleRegister(instr->TempAt(2)); |
| + ASSERT(!scratch1.is(input_reg)); |
| + ASSERT(!scratch2.is(input_reg)); |
| + ASSERT(!scratch2.is(scratch1)); |
| + ASSERT(!double_scratch1.is(double_scratch2)); |
| + ASSERT(!double_scratch1.is(double_scratch3)); |
| + ASSERT(!double_scratch2.is(double_scratch3)); |
| + |
| + Label done; |
| + |
| // Heap number map check. |
| - __ ldr(scratch, FieldMemOperand(input_reg, HeapObject::kMapOffset)); |
| + __ ldr(scratch1, FieldMemOperand(input_reg, HeapObject::kMapOffset)); |
| __ LoadRoot(ip, Heap::kHeapNumberMapRootIndex); |
| - __ cmp(scratch, Operand(ip)); |
| + __ cmp(scratch1, Operand(ip)); |
| + CpuFeatures::Scope scope(VFP3); |
| if (instr->truncating()) { |
| - Label heap_number; |
| + // Performs a truncating conversion of a floating point number as used by |
| + // the JS bitwise operations. |
| + Label heap_number, success; |
| __ b(eq, &heap_number); |
| // Check for undefined. Undefined is converted to zero for truncating |
| // conversions. |
| @@ -3305,36 +3500,42 @@ |
| __ b(&done); |
| __ bind(&heap_number); |
| - __ sub(ip, input_reg, Operand(kHeapObjectTag)); |
| - __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset); |
| - __ vcmp(dbl_tmp, 0.0); // Sets overflow bit in FPSCR flags if NaN. |
| - __ vcvt_s32_f64(flt_scratch, dbl_tmp); |
| - __ vmov(input_reg, flt_scratch); // 32-bit result of conversion. |
| - __ vmrs(pc); // Move vector status bits to normal status bits. |
| - // Overflow bit is set if dbl_tmp is Nan. |
| - __ cmn(input_reg, Operand(1), vc); // 0x7fffffff + 1 -> overflow. |
| - __ cmp(input_reg, Operand(1), vc); // 0x80000000 - 1 -> overflow. |
| - DeoptimizeIf(vs, instr->environment()); // Saturation may have occured. |
| + DwVfpRegister double_value = double_scratch3; |
| + double_scratch3 = no_dreg; |
| + __ sub(scratch1, input_reg, Operand(kHeapObjectTag)); |
| + __ vldr(double_value, scratch1, HeapNumber::kValueOffset); |
| + EmitECMATruncate(input_reg, |
| + scratch1, |
| + scratch2, |
| + double_value, |
| + double_scratch1, |
| + double_scratch2, |
| + &success); |
| + DeoptimizeIf(al, instr->environment()); |
| + __ bind(&success); |
| + |
| } else { |
| // Deoptimize if we don't have a heap number. |
| DeoptimizeIf(ne, instr->environment()); |
| __ sub(ip, input_reg, Operand(kHeapObjectTag)); |
| - __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset); |
| - __ vcvt_s32_f64(flt_scratch, dbl_tmp); |
| - __ vmov(input_reg, flt_scratch); // 32-bit result of conversion. |
| - // Non-truncating conversion means that we cannot lose bits, so we convert |
| - // back to check; note that using non-overlapping s and d regs would be |
| - // slightly faster. |
| - __ vcvt_f64_s32(dbl_scratch, flt_scratch); |
| - __ VFPCompareAndSetFlags(dbl_scratch, dbl_tmp); |
| - DeoptimizeIf(ne, instr->environment()); // Not equal or unordered. |
| + __ vldr(double_scratch1, ip, HeapNumber::kValueOffset); |
| + __ EmitVFPTruncate(kRoundToZero, |
| + single_scratch, |
| + double_scratch1, |
| + scratch1, |
| + scratch2, |
| + kCheckForInexactConversion); |
| + DeoptimizeIf(ne, instr->environment()); |
| + // Load the result. |
| + __ vmov(input_reg, single_scratch); |
| + |
| if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { |
| - __ tst(input_reg, Operand(input_reg)); |
| + __ cmp(input_reg, Operand(0)); |
| __ b(ne, &done); |
| - __ vmov(lr, ip, dbl_tmp); |
| - __ tst(ip, Operand(1 << 31)); // Test sign bit. |
| + __ vmov(scratch1, double_scratch1.high()); |
| + __ tst(scratch1, Operand(HeapNumber::kSignMask)); |
| DeoptimizeIf(ne, instr->environment()); |
| } |
| } |
| @@ -3377,47 +3578,46 @@ |
| void LCodeGen::DoDoubleToI(LDoubleToI* instr) { |
| LOperand* input = instr->InputAt(0); |
| + LOperand* result = instr->result(); |
| ASSERT(input->IsDoubleRegister()); |
| - LOperand* result = instr->result(); |
| ASSERT(result->IsRegister()); |
| - DoubleRegister double_input = ToDoubleRegister(input); |
| Register result_reg = ToRegister(result); |
| - SwVfpRegister single_scratch = double_scratch0().low(); |
| Register scratch1 = scratch0(); |
| Register scratch2 = ToRegister(instr->TempAt(0)); |
| + DwVfpRegister double_input = ToDoubleRegister(input); |
| + DwVfpRegister double_scratch1 = double_scratch0(); |
| + DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(1)); |
| + SwVfpRegister single_scratch = double_scratch0().low(); |
| - __ EmitVFPTruncate(kRoundToZero, |
| - single_scratch, |
| + Label done; |
| + |
| + if (instr->truncating()) { |
| + Label success; |
| + EmitECMATruncate(result_reg, |
| + scratch1, |
| + scratch2, |
| double_input, |
| - scratch1, |
| - scratch2); |
| - |
| - // Deoptimize if we had a vfp invalid exception. |
| - DeoptimizeIf(ne, instr->environment()); |
| - |
| - // Retrieve the result. |
| - __ vmov(result_reg, single_scratch); |
| - |
| - if (!instr->truncating()) { |
| - // Convert result back to double and compare with input |
| - // to check if the conversion was exact. |
| - __ vmov(single_scratch, result_reg); |
| - __ vcvt_f64_s32(double_scratch0(), single_scratch); |
| - __ VFPCompareAndSetFlags(double_scratch0(), double_input); |
| + double_scratch1, |
| + double_scratch2, |
| + &success); |
| + DeoptimizeIf(al, instr->environment()); |
| + __ bind(&success); |
| + } else { |
| + VFPRoundingMode rounding_mode = kRoundToMinusInf; |
| + __ EmitVFPTruncate(rounding_mode, |
| + single_scratch, |
| + double_input, |
| + scratch1, |
| + scratch2, |
| + kCheckForInexactConversion); |
| + // Deoptimize if we had a vfp invalid exception, |
| + // including inexact operation. |
| DeoptimizeIf(ne, instr->environment()); |
| - if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { |
| - Label done; |
| - __ cmp(result_reg, Operand(0)); |
| - __ b(ne, &done); |
| - // Check for -0. |
| - __ vmov(scratch1, double_input.high()); |
| - __ tst(scratch1, Operand(HeapNumber::kSignMask)); |
| - DeoptimizeIf(ne, instr->environment()); |
| - |
| - __ bind(&done); |
| - } |
| + // Retrieve the result. |
| + __ vmov(result_reg, single_scratch); |
| } |
| + __ bind(&done); |
| } |