Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(872)

Unified Diff: src/arm/lithium-codegen-arm.cc

Issue 6625084: ARM: Improved double to integer truncation.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/arm/lithium-codegen-arm.cc
===================================================================
--- src/arm/lithium-codegen-arm.cc (revision 7092)
+++ src/arm/lithium-codegen-arm.cc (working copy)
@@ -2716,6 +2716,188 @@
}
+void LCodeGen::TryVcvtTruncation(Register result,
+ DwVfpRegister double_input,
+ Register saved_fpscr,
+ Register current_fpscr,
+ SwVfpRegister single_scratch,
+ TruncationType type,
+ Label* success) {
+ // Cumulative exception flags.
+ __ bic(current_fpscr, saved_fpscr, Operand(kVFPExceptionMask |
+ kVFPFlushToZeroMask));
+ __ vmsr(current_fpscr);
+ // Try a standard vfp floating-point to integer truncation, using the
+ // default 'round to zero' mode.
+ if (type == kSignedTruncation) {
+ __ vcvt_s32_f64(single_scratch, double_input);
+ } else {
+ __ vcvt_u32_f64(single_scratch, double_input);
+ }
+
+ // Retrieve FPSCR and check for vfp exceptions.
+ __ vmrs(current_fpscr);
+ __ tst(current_fpscr, Operand(kVFPExceptionMask));
+ // Load the result and restore the FPSCR.
+ __ vmov(result, single_scratch);
+ // Restore the saved FPSCR.
+ __ vmsr(saved_fpscr);
+ // If no vfp exceptions were raised we are done. Otherwise fall through.
+ __ b(eq, success);
+}
+
+
+// The truncation process is:
+// 1: Try to truncate using VFP floating-point to integer vcvt instructions.
+// a: Try to truncate to a signed int.
+// b: If that fails, try to truncate to an unsigned int.
+// 2: If that fails, try to bring back the input value in the 32bit int range.
+// If we succeed jump backward to let vcvt instructions truncate the value.
+// 3: If we could not bring back the value to the int32 range, check for special
+// cases.
+// 4: If that also fails, fall through. The following code should handle the
+// failure, probably by deoptimizing.
+void LCodeGen::EmitECMATruncate(Register result,
Karl Klose 2011/03/09 10:37:51 As fschneider suggested, it would be good to move
Alexandre 2011/03/15 08:45:39 I first moved everything to a stub. Then I refact
+ Register scratch1,
+ Register scratch2,
+ DwVfpRegister double_input,
+ DwVfpRegister double_scratch1,
+ DwVfpRegister double_scratch2,
+ Label* done) {
+ ASSERT(!scratch1.is(result));
+ ASSERT(!scratch2.is(result));
+ ASSERT(!scratch1.is(scratch2));
+ ASSERT(!double_scratch1.is(double_input));
+ ASSERT(!double_scratch2.is(double_input));
+ ASSERT(!double_scratch1.is(double_scratch2));
+
+ Register prev_fpscr = scratch1;
+ Register curr_fpscr = scratch2;
+ scratch1 = no_reg;
+ scratch2 = no_reg;
+
+ SwVfpRegister single_scratch = double_scratch2.low();
+
+ Label retry, check_special_cases;
+
+ // Save the current FPSCR.
+ __ vmrs(prev_fpscr);
+ __ bind(&retry);
+
+ // Try standard vfp floating-point to integer truncations, using the
+ // default 'round to zero' mode.
Søren Thygesen Gjesse 2011/03/08 16:15:03 Drive-by: How fast is the VFP rounding? Maybe just
Karl Klose 2011/03/09 10:37:51 We should measure later, if bit-fiddeling code as
Alexandre 2011/03/15 08:45:39 I initially thought that the vfp would be faster.
+ TryVcvtTruncation(result,
+ double_input,
+ prev_fpscr,
+ curr_fpscr,
+ single_scratch,
+ kSignedTruncation,
+ done);
+
+ // Exceptions were raised. Try an unsigned conversion.
+ TryVcvtTruncation(result,
+ double_input,
+ prev_fpscr,
+ curr_fpscr,
+ single_scratch,
+ kUnsignedTruncation,
+ done);
+
+
+ // Standard conversion did not work. Try to handle manually.
+
+ // Clear vfp cumulative exception flags.
+ __ bic(curr_fpscr, curr_fpscr, Operand(kVFPExceptionMask));
+ __ vmsr(curr_fpscr);
+
+ // The truncating conversion is invariant modulo 2^32.
+ // If we are lucky, we can easily bring the input value to the
+ // [-2^32, 2^32] range.
+ Label positive, in_two_31_range;
+ const double two_31_value = 2147483648.0;
Karl Klose 2011/03/09 10:37:51 Constants should be formatted as follows: kTwo31Va
Alexandre 2011/03/15 08:45:39 Done.
+ const double two_32_value = 4294967296.0;
+ // Start bringing the input value to the [-2^32, 2^32] range.
+ DwVfpRegister two_32 = double_scratch2;
+ __ vmov(two_32, two_32_value);
+ __ vdiv(double_scratch1, double_input, two_32);
+ __ vcvt_s32_f64(double_scratch1.low(), double_scratch1);
+ __ vcvt_f64_s32(double_scratch1, double_scratch1.low());
+ __ vmul(double_scratch1, double_scratch1, two_32);
+ // Test for vfp exceptions.
+ __ vmrs(curr_fpscr);
+ __ tst(curr_fpscr, Operand(kVFPExceptionMask));
+ // The following code won't work if vfp exceptions were raised.
+ // (Overflow is raised for high values, infinity. Invalid exception for NaN.)
+ __ b(ne, &check_special_cases);
+ // Perform the subtraction after the branch to preserve the input.
+ __ vsub(double_input, double_input, double_scratch1);
+
+ // double_input: value brought back to [-2^32, 2^32].
+
+ // Get the value rounded toward 0.
+ DwVfpRegister two_31 = double_scratch2;
+ __ vabs(double_scratch1, double_input);
+ __ vmov(two_31, two_31_value);
+ __ vcmp(double_scratch1, two_31);
+ __ vmrs(pc);
+ __ b(lt, &in_two_31_range);
+
+ // The value is in the [-2^32, -2^31] U [2^31, 2^32] range.
+ // Add or subtrct 2^31 to easily round it toward zero.
Karl Klose 2011/03/09 10:37:51 subtrct -> subtract.
Alexandre 2011/03/15 08:45:39 Done.
+ // Push negative values below -2^31 to the positive range to let vcvt_u32_f64
+ // handle the conversion. (For negative value we add 2^31 to easily round,
+ // then add 2^31 again instead of subtracting. This works because the
+ // operation is invariant modulo 2^32.)
+ __ vcmp(double_input, 0.0);
+ __ vmrs(pc);
+ __ vadd(double_input, double_input, two_31, lt);
+ __ vsub(double_input, double_input, two_31, ge);
+ __ vcvt_s32_f64(double_input.low(), double_input);
Karl Klose 2011/03/09 10:37:51 Should this code not use vcvt_u32_f64 as stated in
Alexandre 2011/03/15 08:45:39 No it should not. I updated the comment before to
+ __ vcvt_f64_s32(double_input, double_input.low());
+ __ vadd(double_input, double_input, two_31);
+ __ b(&retry);
+
+ __ bind(&in_two_31_range);
+ // Round the value toward zero and jump back to let the standard
+ // code handle the conversion.
+ __ vcvt_s32_f64(double_input.low(), double_input);
+ __ vcvt_f64_s32(double_input, double_input.low());
+ __ b(&retry);
+
+ // We never fall through to here.
+ // We always jump to 'done' if conversion was successful.
+ if (FLAG_debug_code) {
+ __ Abort("We should never fall through.");
+ }
+
+ // Check for a high exponent, infinity, and NaN, which should all return 0.
+ // * If the unbiased exponent is greater than 52 + 32 = 84 then all mantissa
+ // bits are shifted out of the 32bit integer range and the result is 0.
+ // * NaN and Infinity have an exponent of 0x7ff, so the test below will also
+ // detect them.
+
+ __ bind(&check_special_cases);
+
+ scratch2 = curr_fpscr;
+ curr_fpscr = no_reg;
+
+ // Get exponent alone in scratch2.
+ __ vmov(scratch2, double_input.high());
+ __ Ubfx(scratch2,
+ scratch2,
+ HeapNumber::kExponentShift,
+ HeapNumber::kExponentBits);
+ const int32_t big_exp = 84;
+ __ cmp(scratch2, Operand(HeapNumber::kExponentBias + big_exp));
+ __ mov(result, Operand(0));
+ __ b(ge, done);
+
+ // We could not handle the truncation manually.
+ // Restore the FPSCR and fall through.
+ __ vmsr(prev_fpscr);
+}
+
+
void LCodeGen::DoMathLog(LUnaryMathOperation* instr) {
ASSERT(ToDoubleRegister(instr->result()).is(d2));
TranscendentalCacheStub stub(TranscendentalCache::LOG,
@@ -3281,20 +3463,33 @@
void LCodeGen::DoDeferredTaggedToI(LTaggedToI* instr) {
- Label done;
Register input_reg = ToRegister(instr->InputAt(0));
- Register scratch = scratch0();
- DoubleRegister dbl_scratch = d0;
- SwVfpRegister flt_scratch = s0;
- DoubleRegister dbl_tmp = ToDoubleRegister(instr->TempAt(0));
+ Register scratch1 = scratch0();
+ Register scratch2 = ToRegister(instr->TempAt(0));
+ SwVfpRegister single_scratch = s0;
+ DwVfpRegister double_scratch1 = d0;
+ DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(1));
+ DwVfpRegister double_scratch3 = ToDoubleRegister(instr->TempAt(2));
+ ASSERT(!scratch1.is(input_reg));
+ ASSERT(!scratch2.is(input_reg));
+ ASSERT(!scratch2.is(scratch1));
+ ASSERT(!double_scratch1.is(double_scratch2));
+ ASSERT(!double_scratch1.is(double_scratch3));
+ ASSERT(!double_scratch2.is(double_scratch3));
+
+ Label done;
+
// Heap number map check.
- __ ldr(scratch, FieldMemOperand(input_reg, HeapObject::kMapOffset));
+ __ ldr(scratch1, FieldMemOperand(input_reg, HeapObject::kMapOffset));
__ LoadRoot(ip, Heap::kHeapNumberMapRootIndex);
- __ cmp(scratch, Operand(ip));
+ __ cmp(scratch1, Operand(ip));
+ CpuFeatures::Scope scope(VFP3);
if (instr->truncating()) {
- Label heap_number;
+ // Performs a truncating conversion of a floating point number as used by
+ // the JS bitwise operations.
+ Label heap_number, success;
__ b(eq, &heap_number);
// Check for undefined. Undefined is converted to zero for truncating
// conversions.
@@ -3305,36 +3500,42 @@
__ b(&done);
__ bind(&heap_number);
- __ sub(ip, input_reg, Operand(kHeapObjectTag));
- __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset);
- __ vcmp(dbl_tmp, 0.0); // Sets overflow bit in FPSCR flags if NaN.
- __ vcvt_s32_f64(flt_scratch, dbl_tmp);
- __ vmov(input_reg, flt_scratch); // 32-bit result of conversion.
- __ vmrs(pc); // Move vector status bits to normal status bits.
- // Overflow bit is set if dbl_tmp is Nan.
- __ cmn(input_reg, Operand(1), vc); // 0x7fffffff + 1 -> overflow.
- __ cmp(input_reg, Operand(1), vc); // 0x80000000 - 1 -> overflow.
- DeoptimizeIf(vs, instr->environment()); // Saturation may have occured.
+ DwVfpRegister double_value = double_scratch3;
+ double_scratch3 = no_dreg;
+ __ sub(scratch1, input_reg, Operand(kHeapObjectTag));
+ __ vldr(double_value, scratch1, HeapNumber::kValueOffset);
+ EmitECMATruncate(input_reg,
+ scratch1,
+ scratch2,
+ double_value,
+ double_scratch1,
+ double_scratch2,
+ &success);
+ DeoptimizeIf(al, instr->environment());
+ __ bind(&success);
+
} else {
// Deoptimize if we don't have a heap number.
DeoptimizeIf(ne, instr->environment());
__ sub(ip, input_reg, Operand(kHeapObjectTag));
- __ vldr(dbl_tmp, ip, HeapNumber::kValueOffset);
- __ vcvt_s32_f64(flt_scratch, dbl_tmp);
- __ vmov(input_reg, flt_scratch); // 32-bit result of conversion.
- // Non-truncating conversion means that we cannot lose bits, so we convert
- // back to check; note that using non-overlapping s and d regs would be
- // slightly faster.
- __ vcvt_f64_s32(dbl_scratch, flt_scratch);
- __ VFPCompareAndSetFlags(dbl_scratch, dbl_tmp);
- DeoptimizeIf(ne, instr->environment()); // Not equal or unordered.
+ __ vldr(double_scratch1, ip, HeapNumber::kValueOffset);
+ __ EmitVFPTruncate(kRoundToZero,
+ single_scratch,
+ double_scratch1,
+ scratch1,
+ scratch2,
+ kCheckForInexactConversion);
+ DeoptimizeIf(ne, instr->environment());
+ // Load the result.
+ __ vmov(input_reg, single_scratch);
+
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
- __ tst(input_reg, Operand(input_reg));
+ __ cmp(input_reg, Operand(0));
__ b(ne, &done);
- __ vmov(lr, ip, dbl_tmp);
- __ tst(ip, Operand(1 << 31)); // Test sign bit.
+ __ vmov(scratch1, double_scratch1.high());
+ __ tst(scratch1, Operand(HeapNumber::kSignMask));
DeoptimizeIf(ne, instr->environment());
}
}
@@ -3377,47 +3578,46 @@
void LCodeGen::DoDoubleToI(LDoubleToI* instr) {
LOperand* input = instr->InputAt(0);
+ LOperand* result = instr->result();
ASSERT(input->IsDoubleRegister());
- LOperand* result = instr->result();
ASSERT(result->IsRegister());
- DoubleRegister double_input = ToDoubleRegister(input);
Register result_reg = ToRegister(result);
- SwVfpRegister single_scratch = double_scratch0().low();
Register scratch1 = scratch0();
Register scratch2 = ToRegister(instr->TempAt(0));
+ DwVfpRegister double_input = ToDoubleRegister(input);
+ DwVfpRegister double_scratch1 = double_scratch0();
+ DwVfpRegister double_scratch2 = ToDoubleRegister(instr->TempAt(1));
+ SwVfpRegister single_scratch = double_scratch0().low();
- __ EmitVFPTruncate(kRoundToZero,
- single_scratch,
+ Label done;
+
+ if (instr->truncating()) {
+ Label success;
+ EmitECMATruncate(result_reg,
+ scratch1,
+ scratch2,
double_input,
- scratch1,
- scratch2);
-
- // Deoptimize if we had a vfp invalid exception.
- DeoptimizeIf(ne, instr->environment());
-
- // Retrieve the result.
- __ vmov(result_reg, single_scratch);
-
- if (!instr->truncating()) {
- // Convert result back to double and compare with input
- // to check if the conversion was exact.
- __ vmov(single_scratch, result_reg);
- __ vcvt_f64_s32(double_scratch0(), single_scratch);
- __ VFPCompareAndSetFlags(double_scratch0(), double_input);
+ double_scratch1,
+ double_scratch2,
+ &success);
+ DeoptimizeIf(al, instr->environment());
+ __ bind(&success);
+ } else {
+ VFPRoundingMode rounding_mode = kRoundToMinusInf;
+ __ EmitVFPTruncate(rounding_mode,
+ single_scratch,
+ double_input,
+ scratch1,
+ scratch2,
+ kCheckForInexactConversion);
+ // Deoptimize if we had a vfp invalid exception,
+ // including inexact operation.
DeoptimizeIf(ne, instr->environment());
- if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
- Label done;
- __ cmp(result_reg, Operand(0));
- __ b(ne, &done);
- // Check for -0.
- __ vmov(scratch1, double_input.high());
- __ tst(scratch1, Operand(HeapNumber::kSignMask));
- DeoptimizeIf(ne, instr->environment());
-
- __ bind(&done);
- }
+ // Retrieve the result.
+ __ vmov(result_reg, single_scratch);
}
+ __ bind(&done);
}

Powered by Google App Engine
This is Rietveld 408576698