Index: src/arm/lithium-codegen-arm.cc |
=================================================================== |
--- src/arm/lithium-codegen-arm.cc (revision 7092) |
+++ src/arm/lithium-codegen-arm.cc (working copy) |
@@ -2716,6 +2716,188 @@ |
} |
+void LCodeGen::TryVcvtTruncation(Register result, |
+ DwVfpRegister double_input, |
+ Register saved_fpscr, |
+ Register current_fpscr, |
+ SwVfpRegister single_scratch, |
+ TruncationType type, |
+ Label* success) { |
+ // Cumulative exception flags. |
+ __ bic(current_fpscr, saved_fpscr, Operand(kVFPExceptionMask | |
+ kVFPFlushToZeroMask)); |
+ __ vmsr(current_fpscr); |
+ // Try a standard vfp floating-point to integer truncation, using the |
+ // default 'round to zero' mode. |
+ if (type == kSignedTruncation) { |
+ __ vcvt_s32_f64(single_scratch, double_input); |
+ } else { |
+ __ vcvt_u32_f64(single_scratch, double_input); |
+ } |
+ |
+ // Retrieve FPSCR and check for vfp exceptions. |
+ __ vmrs(current_fpscr); |
+ __ tst(current_fpscr, Operand(kVFPExceptionMask)); |
+ // Load the result and restore the FPSCR. |
+ __ vmov(result, single_scratch); |
+ // Restore the saved FPSCR. |
+ __ vmsr(saved_fpscr); |
+ // If no vfp exceptions were raised we are done. Otherwise fall through. |
+ __ b(eq, success); |
+} |
+ |
+ |
+// The truncation process is: |
+// 1: Try to truncate using VFP floating-point to integer vcvt instructions. |
+// a: Try to truncate to a signed int. |
+// b: If that fails, try to truncate to an unsigned int. |
+// 2: If that fails, try to bring back the input value in the 32bit int range. |
+// If we succeed jump backward to let vcvt instructions truncate the value. |
+// 3: If we could not bring back the value to the int32 range, check for special |
+// cases. |
+// 4: If that also fails, fall through. The following code should handle the |
+// failure, probably by deoptimizing. |
+void LCodeGen::EmitECMATruncate(Register result, |
Karl Klose
2011/03/09 10:37:51
As fschneider suggested, it would be good to move
Alexandre
2011/03/15 08:45:39
I first moved everything to a stub. Then I refact
|
+ Register scratch1, |
+ Register scratch2, |
+ DwVfpRegister double_input, |
+ DwVfpRegister double_scratch1, |
+ DwVfpRegister double_scratch2, |
+ Label* done) { |
+ ASSERT(!scratch1.is(result)); |
+ ASSERT(!scratch2.is(result)); |
+ ASSERT(!scratch1.is(scratch2)); |
+ ASSERT(!double_scratch1.is(double_input)); |
+ ASSERT(!double_scratch2.is(double_input)); |
+ ASSERT(!double_scratch1.is(double_scratch2)); |
+ |
+ Register prev_fpscr = scratch1; |
+ Register curr_fpscr = scratch2; |
+ scratch1 = no_reg; |
+ scratch2 = no_reg; |
+ |
+ SwVfpRegister single_scratch = double_scratch2.low(); |
+ |
+ Label retry, check_special_cases; |
+ |
+ // Save the current FPSCR. |
+ __ vmrs(prev_fpscr); |
+ __ bind(&retry); |
+ |
+ // Try standard vfp floating-point to integer truncations, using the |
+ // default 'round to zero' mode. |
Søren Thygesen Gjesse
2011/03/08 16:15:03
Drive-by:
How fast is the VFP rounding? Maybe just
Karl Klose
2011/03/09 10:37:51
We should measure later, if bit-fiddeling code as
Alexandre
2011/03/15 08:45:39
I initially thought that the vfp would be faster.
|
+ TryVcvtTruncation(result, |
+ double_input, |
+ prev_fpscr, |
+ curr_fpscr, |
+ single_scratch, |
+ kSignedTruncation, |
+ done); |
+ |
+ // Exceptions were raised. Try an unsigned conversion. |
+ TryVcvtTruncation(result, |
+ double_input, |
+ prev_fpscr, |
+ curr_fpscr, |
+ single_scratch, |
+ kUnsignedTruncation, |
+ done); |
+ |
+ |
+ // Standard conversion did not work. Try to handle manually. |
+ |
+ // Clear vfp cumulative exception flags. |
+ __ bic(curr_fpscr, curr_fpscr, Operand(kVFPExceptionMask)); |
+ __ vmsr(curr_fpscr); |
+ |
+ // The truncating conversion is invariant modulo 2^32. |
+ // If we are lucky, we can easily bring the input value to the |
+ // [-2^32, 2^32] range. |
+ Label positive, in_two_31_range; |
+ const double two_31_value = 2147483648.0; |
Karl Klose
2011/03/09 10:37:51
Constants should be formatted as follows: kTwo31Va
Alexandre
2011/03/15 08:45:39
Done.
|
+ const double two_32_value = 4294967296.0; |
+ // Start bringing the input value to the [-2^32, 2^32] range. |
+ DwVfpRegister two_32 = double_scratch2; |
+ __ vmov(two_32, two_32_value); |
+ __ vdiv(double_scratch1, double_input, two_32); |
+ __ vcvt_s32_f64(double_scratch1.low(), double_scratch1); |
+ __ vcvt_f64_s32(double_scratch1, double_scratch1.low()); |
+ __ vmul(double_scratch1, double_scratch1, two_32); |
+ // Test for vfp exceptions. |
+ __ vmrs(curr_fpscr); |
+ __ tst(curr_fpscr, Operand(kVFPExceptionMask)); |
+ // The following code won't work if vfp exceptions were raised. |
+ // (Overflow is raised for high values, infinity. Invalid exception for NaN.) |
+ __ b(ne, &check_special_cases); |
+ // Perform the subtraction after the branch to preserve the input. |
+ __ vsub(double_input, double_input, double_scratch1); |
+ |
+ // double_input: value brought back to [-2^32, 2^32]. |
+ |
+ // Get the value rounded toward 0. |
+ DwVfpRegister two_31 = double_scratch2; |
+ __ vabs(double_scratch1, double_input); |
+ __ vmov(two_31, two_31_value); |
+ __ vcmp(double_scratch1, two_31); |
+ __ vmrs(pc); |
+ __ b(lt, &in_two_31_range); |
+ |
+ // The value is in the [-2^32, -2^31] U [2^31, 2^32] range. |
+ // Add or subtrct 2^31 to easily round it toward zero. |
Karl Klose
2011/03/09 10:37:51
subtrct -> subtract.
Alexandre
2011/03/15 08:45:39
Done.
|
+ // Push negative values below -2^31 to the positive range to let vcvt_u32_f64 |
+ // handle the conversion. (For negative value we add 2^31 to easily round, |
+ // then add 2^31 again instead of subtracting. This works because the |
+ // operation is invariant modulo 2^32.) |
+ __ vcmp(double_input, 0.0); |
+ __ vmrs(pc); |
+ __ vadd(double_input, double_input, two_31, lt); |
+ __ vsub(double_input, double_input, two_31, ge); |
+ __ vcvt_s32_f64(double_input.low(), double_input); |
Karl Klose
2011/03/09 10:37:51
Should this code not use vcvt_u32_f64 as stated in
Alexandre
2011/03/15 08:45:39
No it should not. I updated the comment before to
|
+ __ vcvt_f64_s32(double_input, double_input.low()); |
+ __ vadd(double_input, double_input, two_31); |
+ __ b(&retry); |
+ |
+ __ bind(&in_two_31_range); |
+ // Round the value toward zero and jump back to let the standard |
+ // code handle the conversion. |
+ __ vcvt_s32_f64(double_input.low(), double_input); |
+ __ vcvt_f64_s32(double_input, double_input.low()); |
+ __ b(&retry); |
+ |
+ // We never fall through to here. |
+ // We always jump to 'done' if conversion was successful. |
+ if (FLAG_debug_code) { |
+ __ Abort("We should never fall through."); |
+ } |
+ |
+ // Check for a high exponent, infinity, and NaN, which should all return 0. |
+ // * If the unbiased exponent is greater than 52 + 32 = 84 then all mantissa |
+ // bits are shifted out of the 32bit integer range and the result is 0. |
+ // * NaN and Infinity have an exponent of 0x7ff, so the test below will also |
+ // detect them. |
+ |
+ __ bind(&check_special_cases); |
+ |
+ scratch2 = curr_fpscr; |
+ curr_fpscr = no_reg; |
+ |
+ // Get exponent alone in scratch2. |
+ __ vmov(scratch2, double_input.high()); |
+ __ Ubfx(scratch2, |
+ scratch2, |
+ HeapNumber::kExponentShift, |
+ HeapNumber::kExponentBits); |
+ const int32_t big_exp = 84; |
+ __ cmp(scratch2, Operand(HeapNumber::kExponentBias + big_exp)); |
+ __ mov(result, Operand(0)); |
+ __ b(ge, done); |
+ |
+ // We could not handle the truncation manually. |
+ // Restore the FPSCR and fall through. |
+ __ vmsr(prev_fpscr); |
+} |
+ |
+ |
void LCodeGen::DoMathLog(LUnaryMathOperation* instr) { |
ASSERT(ToDoubleRegister(instr->result()).is(d2)); |
TranscendentalCacheStub stub(TranscendentalCache::LOG, |
@@ -3281,20 +3463,33 @@ |
void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) { |
- Label done; |
Register input_reg = ToRegister(instr->InputAt(0)); |
- Register scratch = scratch0(); |
- DoubleRegister dbl_scratch = d0; |
- SwVfpRegister flt_scratch = s0; |
- DoubleRegister dbl_tmp = ToDoubleRegister(instr->TempAt(0)); |
+ Register scratch1 = scratch0(); |
+ Register scratch2 = ToRegister(instr->TempAt(0)); |
+ SwVfpRegister single_scratch = s0; |
+ DwVfpRegister double_scratch1 = d0; |
+ DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(1)); |
+ DwVfpRegister double_scratch3 = ToDoubleRegister(instr->TempAt(2)); |
+ ASSERT(!scratch1.is(input_reg)); |
+ ASSERT(!scratch2.is(input_reg)); |
+ ASSERT(!scratch2.is(scratch1)); |
+ ASSERT(!double_scratch1.is(double_scratch2)); |
+ ASSERT(!double_scratch1.is(double_scratch3)); |
+ ASSERT(!double_scratch2.is(double_scratch3)); |
+ |
+ Label done; |
+ |
// Heap number map check. |
- __ ldr(scratch, FieldMemOperand(input_reg, HeapObject::kMapOffset)); |
+ __ ldr(scratch1, FieldMemOperand(input_reg, HeapObject::kMapOffset)); |
__ LoadRoot(ip, Heap::kHeapNumberMapRootIndex); |
- __ cmp(scratch, Operand(ip)); |
+ __ cmp(scratch1, Operand(ip)); |
+ CpuFeatures::Scope scope(VFP3); |
if (instr->truncating()) { |
- Label heap_number; |
+ // Performs a truncating conversion of a floating point number as used by |
+ // the JS bitwise operations. |
+ Label heap_number, success; |
__ b(eq, &heap_number); |
// Check for undefined. Undefined is converted to zero for truncating |
// conversions. |
@@ -3305,36 +3500,42 @@ |
__ b(&done); |
__ bind(&heap_number); |
- __ sub(ip, input_reg, Operand(kHeapObjectTag)); |
- __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset); |
- __ vcmp(dbl_tmp, 0.0); // Sets overflow bit in FPSCR flags if NaN. |
- __ vcvt_s32_f64(flt_scratch, dbl_tmp); |
- __ vmov(input_reg, flt_scratch); // 32-bit result of conversion. |
- __ vmrs(pc); // Move vector status bits to normal status bits. |
- // Overflow bit is set if dbl_tmp is Nan. |
- __ cmn(input_reg, Operand(1), vc); // 0x7fffffff + 1 -> overflow. |
- __ cmp(input_reg, Operand(1), vc); // 0x80000000 - 1 -> overflow. |
- DeoptimizeIf(vs, instr->environment()); // Saturation may have occured. |
+ DwVfpRegister double_value = double_scratch3; |
+ double_scratch3 = no_dreg; |
+ __ sub(scratch1, input_reg, Operand(kHeapObjectTag)); |
+ __ vldr(double_value, scratch1, HeapNumber::kValueOffset); |
+ EmitECMATruncate(input_reg, |
+ scratch1, |
+ scratch2, |
+ double_value, |
+ double_scratch1, |
+ double_scratch2, |
+ &success); |
+ DeoptimizeIf(al, instr->environment()); |
+ __ bind(&success); |
+ |
} else { |
// Deoptimize if we don't have a heap number. |
DeoptimizeIf(ne, instr->environment()); |
__ sub(ip, input_reg, Operand(kHeapObjectTag)); |
- __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset); |
- __ vcvt_s32_f64(flt_scratch, dbl_tmp); |
- __ vmov(input_reg, flt_scratch); // 32-bit result of conversion. |
- // Non-truncating conversion means that we cannot lose bits, so we convert |
- // back to check; note that using non-overlapping s and d regs would be |
- // slightly faster. |
- __ vcvt_f64_s32(dbl_scratch, flt_scratch); |
- __ VFPCompareAndSetFlags(dbl_scratch, dbl_tmp); |
- DeoptimizeIf(ne, instr->environment()); // Not equal or unordered. |
+ __ vldr(double_scratch1, ip, HeapNumber::kValueOffset); |
+ __ EmitVFPTruncate(kRoundToZero, |
+ single_scratch, |
+ double_scratch1, |
+ scratch1, |
+ scratch2, |
+ kCheckForInexactConversion); |
+ DeoptimizeIf(ne, instr->environment()); |
+ // Load the result. |
+ __ vmov(input_reg, single_scratch); |
+ |
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { |
- __ tst(input_reg, Operand(input_reg)); |
+ __ cmp(input_reg, Operand(0)); |
__ b(ne, &done); |
- __ vmov(lr, ip, dbl_tmp); |
- __ tst(ip, Operand(1 << 31)); // Test sign bit. |
+ __ vmov(scratch1, double_scratch1.high()); |
+ __ tst(scratch1, Operand(HeapNumber::kSignMask)); |
DeoptimizeIf(ne, instr->environment()); |
} |
} |
@@ -3377,47 +3578,46 @@ |
void LCodeGen::DoDoubleToI(LDoubleToI* instr) { |
LOperand* input = instr->InputAt(0); |
+ LOperand* result = instr->result(); |
ASSERT(input->IsDoubleRegister()); |
- LOperand* result = instr->result(); |
ASSERT(result->IsRegister()); |
- DoubleRegister double_input = ToDoubleRegister(input); |
Register result_reg = ToRegister(result); |
- SwVfpRegister single_scratch = double_scratch0().low(); |
Register scratch1 = scratch0(); |
Register scratch2 = ToRegister(instr->TempAt(0)); |
+ DwVfpRegister double_input = ToDoubleRegister(input); |
+ DwVfpRegister double_scratch1 = double_scratch0(); |
+ DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(1)); |
+ SwVfpRegister single_scratch = double_scratch0().low(); |
- __ EmitVFPTruncate(kRoundToZero, |
- single_scratch, |
+ Label done; |
+ |
+ if (instr->truncating()) { |
+ Label success; |
+ EmitECMATruncate(result_reg, |
+ scratch1, |
+ scratch2, |
double_input, |
- scratch1, |
- scratch2); |
- |
- // Deoptimize if we had a vfp invalid exception. |
- DeoptimizeIf(ne, instr->environment()); |
- |
- // Retrieve the result. |
- __ vmov(result_reg, single_scratch); |
- |
- if (!instr->truncating()) { |
- // Convert result back to double and compare with input |
- // to check if the conversion was exact. |
- __ vmov(single_scratch, result_reg); |
- __ vcvt_f64_s32(double_scratch0(), single_scratch); |
- __ VFPCompareAndSetFlags(double_scratch0(), double_input); |
+ double_scratch1, |
+ double_scratch2, |
+ &success); |
+ DeoptimizeIf(al, instr->environment()); |
+ __ bind(&success); |
+ } else { |
+ VFPRoundingMode rounding_mode = kRoundToMinusInf; |
+ __ EmitVFPTruncate(rounding_mode, |
+ single_scratch, |
+ double_input, |
+ scratch1, |
+ scratch2, |
+ kCheckForInexactConversion); |
+ // Deoptimize if we had a vfp invalid exception, |
+ // including inexact operation. |
DeoptimizeIf(ne, instr->environment()); |
- if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { |
- Label done; |
- __ cmp(result_reg, Operand(0)); |
- __ b(ne, &done); |
- // Check for -0. |
- __ vmov(scratch1, double_input.high()); |
- __ tst(scratch1, Operand(HeapNumber::kSignMask)); |
- DeoptimizeIf(ne, instr->environment()); |
- |
- __ bind(&done); |
- } |
+ // Retrieve the result. |
+ __ vmov(result_reg, single_scratch); |
} |
+ __ bind(&done); |
} |