src/arm/macro-assembler-arm.cc - Issue 12567004: ARM: Optimsisation of ECMA ToInt32.

Unified Diff: src/arm/macro-assembler-arm.cc

Issue 12567004: ARM: Optimsisation of ECMA ToInt32. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Created 7 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/arm/macro-assembler-arm.cc

diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc

index 2265d03bd907532edbf280da92ba3bd9e2daef60..ce9a8d4bfb91b5755b2c1ea708999d5a230f0b46 100644

--- a/src/arm/macro-assembler-arm.cc

+++ b/src/arm/macro-assembler-arm.cc

@@ -2436,105 +2436,6 @@ void MacroAssembler::SmiToDoubleVFPRegister(Register smi,

}

-// Tries to get a signed int32 out of a double precision floating point heap

-// number. Rounds towards 0. Branch to 'not_int32' if the double is out of the

-// 32bits signed integer range.

-void MacroAssembler::ConvertToInt32(Register source,

- Register dest,

- Register scratch,

- Register scratch2,

- DwVfpRegister double_scratch,

- Label *not_int32) {

- if (CpuFeatures::IsSupported(VFP2)) {

- CpuFeatureScope scope(this, VFP2);

- sub(scratch, source, Operand(kHeapObjectTag));

- vldr(double_scratch, scratch, HeapNumber::kValueOffset);

- vcvt_s32_f64(double_scratch.low(), double_scratch);

- vmov(dest, double_scratch.low());

- // Signed vcvt instruction will saturate to the minimum (0x80000000) or

- // maximun (0x7fffffff) signed 32bits integer when the double is out of

- // range. When substracting one, the minimum signed integer becomes the

- // maximun signed integer.

- sub(scratch, dest, Operand(1));

- cmp(scratch, Operand(LONG_MAX - 1));

- // If equal then dest was LONG_MAX, if greater dest was LONG_MIN.

- b(ge, not_int32);

- } else {

- // This code is faster for doubles that are in the ranges -0x7fffffff to

- // -0x40000000 or 0x40000000 to 0x7fffffff. This corresponds almost to

- // the range of signed int32 values that are not Smis. Jumps to the label

- // 'not_int32' if the double isn't in the range -0x80000000.0 to

- // 0x80000000.0 (excluding the endpoints).

- Label right_exponent, done;

- // Get exponent word.

- ldr(scratch, FieldMemOperand(source, HeapNumber::kExponentOffset));

- // Get exponent alone in scratch2.

- Ubfx(scratch2,

- scratch,

- HeapNumber::kExponentShift,

- HeapNumber::kExponentBits);

- // Load dest with zero. We use this either for the final shift or

- // for the answer.

- mov(dest, Operand::Zero());

- // Check whether the exponent matches a 32 bit signed int that is not a Smi.

- // A non-Smi integer is 1.xxx * 2^30 so the exponent is 30 (biased). This is

- // the exponent that we are fastest at and also the highest exponent we can

- // handle here.

- const uint32_t non_smi_exponent = HeapNumber::kExponentBias + 30;

- // The non_smi_exponent, 0x41d, is too big for ARM's immediate field so we

- // split it up to avoid a constant pool entry. You can't do that in general

- // for cmp because of the overflow flag, but we know the exponent is in the

- // range 0-2047 so there is no overflow.

- int fudge_factor = 0x400;

- sub(scratch2, scratch2, Operand(fudge_factor));

- cmp(scratch2, Operand(non_smi_exponent - fudge_factor));

- // If we have a match of the int32-but-not-Smi exponent then skip some

- // logic.

- b(eq, &right_exponent);

- // If the exponent is higher than that then go to slow case. This catches

- // numbers that don't fit in a signed int32, infinities and NaNs.

- b(gt, not_int32);

- // We know the exponent is smaller than 30 (biased). If it is less than

- // 0 (biased) then the number is smaller in magnitude than 1.0 * 2^0, i.e.

- // it rounds to zero.

- const uint32_t zero_exponent = HeapNumber::kExponentBias + 0;

- sub(scratch2, scratch2, Operand(zero_exponent - fudge_factor), SetCC);

- // Dest already has a Smi zero.

- b(lt, &done);

- // We have an exponent between 0 and 30 in scratch2. Subtract from 30 to

- // get how much to shift down.

- rsb(dest, scratch2, Operand(30));

- bind(&right_exponent);

- // Get the top bits of the mantissa.

- and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask));

- // Put back the implicit 1.

- orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift));

- // Shift up the mantissa bits to take up the space the exponent used to

- // take. We just orred in the implicit bit so that took care of one and

- // we want to leave the sign bit 0 so we subtract 2 bits from the shift

- // distance.

- const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;

- mov(scratch2, Operand(scratch2, LSL, shift_distance));

- // Put sign in zero flag.

- tst(scratch, Operand(HeapNumber::kSignMask));

- // Get the second half of the double. For some exponents we don't

- // actually need this because the bits get shifted out again, but

- // it's probably slower to test than just to do it.

- ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset));

- // Shift down 22 bits to get the last 10 bits.

- orr(scratch, scratch2, Operand(scratch, LSR, 32 - shift_distance));

- // Move down according to the exponent.

- mov(dest, Operand(scratch, LSR, dest));

- // Fix sign if sign bit was set.

- rsb(dest, dest, Operand::Zero(), LeaveCC, ne);

- bind(&done);

- }

void MacroAssembler::TestDoubleIsInt32(DwVfpRegister double_input,

DwVfpRegister double_scratch) {

ASSERT(!double_input.is(double_scratch));

@@ -2608,85 +2509,32 @@ void MacroAssembler::TryInt32Floor(Register result,

}

-void MacroAssembler::EmitOutOfInt32RangeTruncate(Register result,

- Register input_high,

- Register input_low,

- Register scratch) {

- Label done, normal_exponent, restore_sign;

- // Extract the biased exponent in result.

- Ubfx(result,

- input_high,

- HeapNumber::kExponentShift,

- HeapNumber::kExponentBits);

- // Check for Infinity and NaNs, which should return 0.

- cmp(result, Operand(HeapNumber::kExponentMask));

- mov(result, Operand::Zero(), LeaveCC, eq);

- b(eq, &done);

- // Express exponent as delta to (number of mantissa bits + 31).

- sub(result,

- result,

- Operand(HeapNumber::kExponentBias + HeapNumber::kMantissaBits + 31),

- SetCC);

- // If the delta is strictly positive, all bits would be shifted away,

- // which means that we can return 0.

- b(le, &normal_exponent);

- mov(result, Operand::Zero());

- b(&done);

- bind(&normal_exponent);

- const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;

- // Calculate shift.

- add(scratch, result, Operand(kShiftBase + HeapNumber::kMantissaBits), SetCC);

- // Save the sign.

- Register sign = result;

- result = no_reg;

- and_(sign, input_high, Operand(HeapNumber::kSignMask));

- // Set the implicit 1 before the mantissa part in input_high.

- orr(input_high,

- input_high,

- Operand(1 << HeapNumber::kMantissaBitsInTopWord));

- // Shift the mantissa bits to the correct position.

- // We don't need to clear non-mantissa bits as they will be shifted away.

- // If they weren't, it would mean that the answer is in the 32bit range.

- mov(input_high, Operand(input_high, LSL, scratch));

- // Replace the shifted bits with bits from the lower mantissa word.

- Label pos_shift, shift_done;

- rsb(scratch, scratch, Operand(32), SetCC);

- b(&pos_shift, ge);

- // Negate scratch.

- rsb(scratch, scratch, Operand::Zero());

- mov(input_low, Operand(input_low, LSL, scratch));

- b(&shift_done);

- bind(&pos_shift);

- mov(input_low, Operand(input_low, LSR, scratch));

- bind(&shift_done);

- orr(input_high, input_high, Operand(input_low));

- // Restore sign if necessary.

- cmp(sign, Operand::Zero());

- result = sign;

- sign = no_reg;

- rsb(result, input_high, Operand::Zero(), LeaveCC, ne);

- mov(result, input_high, LeaveCC, eq);

- bind(&done);

+void MacroAssembler::ECMAConvertNumberToInt32(Register source,

+ Register result,

+ Register scratch,

+ Register input_high,

+ Register input_low,

+ DwVfpRegister double_scratch1,

+ DwVfpRegister double_scratch2) {

+ if (CpuFeatures::IsSupported(VFP2)) {

+ CpuFeatureScope scope(this, VFP2);

+ vldr(double_scratch1, FieldMemOperand(source, HeapNumber::kValueOffset));

+ ECMAToInt32VFP(result, double_scratch1, double_scratch2,

+ scratch, input_high, input_low);

+ } else {

+ Ldrd(input_low, input_high,

+ FieldMemOperand(source, HeapNumber::kValueOffset));

+ ECMAToInt32NoVFP(result, scratch, input_high, input_low);

+ }

}

-void MacroAssembler::EmitECMATruncate(Register result,

- DwVfpRegister double_input,

- DwVfpRegister double_scratch,

- Register scratch,

- Register input_high,

- Register input_low) {

+void MacroAssembler::ECMAToInt32VFP(Register result,

+ DwVfpRegister double_input,

+ DwVfpRegister double_scratch,

+ Register scratch,

+ Register input_high,

+ Register input_low) {

CpuFeatureScope scope(this, VFP2);

ASSERT(!input_high.is(result));

ASSERT(!input_low.is(result));

@@ -2696,38 +2544,136 @@ void MacroAssembler::EmitECMATruncate(Register result,

!scratch.is(input_low));

ASSERT(!double_input.is(double_scratch));

- Label done;

+ Label overflow, out_of_range, negate, done;

- // Test if the value can be exactly represented as a signed integer.

- TryDoubleToInt32Exact(result, double_input, double_scratch);

- b(eq, &done);

+ vmov(input_low, input_high, double_input);

+ Ubfx(scratch, input_high,

+ HeapNumber::kExponentShift, HeapNumber::kExponentBits);

+ // Load scratch with exponent - 1. This is faster than loading

+ // with exponent because Bias + 1 = 1024 which is an *ARM* immediate value.

+ sub(scratch, scratch, Operand(HeapNumber::kExponentBias + 1));

+ // Compare exponent with 31 (compare exponent - 1 with 30).

+ cmp(scratch, Operand(30));

+ b(ge, &overflow);

+ // Exponent is less than 31 so vcvt will never saturate.

+ // So, just return the result.

+ vcvt_s32_f64(double_scratch.low(), double_input);

+ vmov(result, double_scratch.low());

+ b(&done);

- // Check the exception flags. If they are not set, we are done.

- // If they are set, it could be because of the conversion above, or because

- // they were set before this code.

- vmrs(scratch);

- tst(scratch, Operand(kVFPInvalidOpExceptionBit));

- b(eq, &done);

+ bind(&overflow);

+ // If exponent is greater than or equal to 84, the 32 less significant

+ // bits are 0s (2^84 = 1, 52 significant bits, 32 uncoded bits),

+ // the result is 0.

+ // This test also catch Nan and infinities which also return 0.

+ // Compare exponent with 84 (compare exponent - 1 with 83).

+ cmp(scratch, Operand(83));

+ b(ge, &out_of_range);

- // Clear cumulative exception flags.

- bic(scratch, scratch, Operand(kVFPInvalidOpExceptionBit));

- vmsr(scratch);

- // Try a conversion to a signed integer.

- vcvt_s32_f64(double_scratch.low(), double_input);

- // Retrieve the FPSCR.

- vmrs(scratch);

- // Check for invalid conversions (out of range and NaNs).

- tst(scratch, Operand(kVFPInvalidOpExceptionBit));

- // If we had no exceptions we are done.

- b(eq, &done);

+ // If we reach this code, 31 <= exponent <= 83.

+ // So, we don't have to handle cases where 0 <= exponent <= 20 for

+ // which we would need to shift right the high part of the mantissa.

+ ECMAToInt32Tail(result, scratch, input_high, input_low,

+ &out_of_range, &negate, &done);

- // Load the double value and perform a manual truncation.

- vmov(input_low, input_high, double_input);

- EmitOutOfInt32RangeTruncate(result,

- input_high,

- input_low,

- scratch);

- bind(&done);

+void MacroAssembler::ECMAToInt32NoVFP(Register result,

+ Register scratch,

+ Register input_high,

+ Register input_low) {

+ ASSERT(!result.is(scratch));

+ ASSERT(!result.is(input_high));

+ ASSERT(!result.is(input_low));

+ ASSERT(!scratch.is(input_high));

+ ASSERT(!scratch.is(input_low));

+ ASSERT(!input_high.is(input_low));

+ Label both, out_of_range, negate, done;

+ Ubfx(scratch, input_high,

+ HeapNumber::kExponentShift, HeapNumber::kExponentBits);

+ // Load scratch with exponent - 1. This is faster than loading

+ // with exponent because Bias + 1 = 1024 which is an *ARM* immediate value.

+ sub(scratch, scratch, Operand(HeapNumber::kExponentBias + 1));

+ // If exponent is negative, 0 < input < 1, the result is 0.

+ // If exponent is greater than or equal to 84, the 32 less significant

+ // bits are 0s (2^84 = 1, 52 significant bits, 32 uncoded bits),

+ // the result is 0.

+ // This test also catch Nan and infinities which also return 0.

+ // Compare exponent with 84 (compare exponent - 1 with 83).

+ cmp(scratch, Operand(83));

+ // We do an unsigned comparison so negative numbers are treated as big

+ // positive number and the two tests above are done in one test.

+ b(hs, &out_of_range);

+ // Load scratch with 20 - exponent (load with 19 - (exponent - 1)).

+ rsb(scratch, scratch, Operand(19), SetCC);

+ b(mi, &both);

+ // 0 <= exponent <= 20, shift only input_high.

+ // Scratch contains: 20 - exponent.

+ Ubfx(result, input_high,

+ 0, HeapNumber::kMantissaBitsInTopWord);

+ orr(result, result, Operand(1 << HeapNumber::kMantissaBitsInTopWord));

+ mov(result, Operand(result, LSR, scratch));

+ b(&negate);

+ bind(&both);

+ // Restore scratch to exponent - 1 to be consistent with ECMAToInt32VFP.

+ rsb(scratch, scratch, Operand(19));

+ ECMAToInt32Tail(result, scratch, input_high, input_low,

+ &out_of_range, &negate, &done);

+void MacroAssembler::ECMAToInt32Tail(Register result,

+ Register scratch,

+ Register input_high,

+ Register input_low,

+ Label* out_of_range,

+ Label* negate,

+ Label* done) {

+ Label only_low;

+ // On entry, scratch contains exponent - 1.

+ // Load scratch with 52 - exponent (load with 51 - (exponent - 1)).

+ rsb(scratch, scratch, Operand(51), SetCC);

+ b(ls, &only_low);

+ // 21 <= exponent <= 51, shift input_low and input_high

+ // to generate the result.

+ mov(input_low, Operand(input_low, LSR, scratch));

+ // Scratch contains: 52 - exponent.

+ // We needs: exponent - 20.

+ // So we use: 32 - scratch = 32 - 52 + exponent = exponent - 20.

+ rsb(scratch, scratch, Operand(32));

+ Ubfx(result, input_high,

+ 0, HeapNumber::kMantissaBitsInTopWord);

+ orr(result, result, Operand(1 << HeapNumber::kMantissaBitsInTopWord));

ulan 2013/03/11 15:11:03 Maybe restore the comment about implicit 1? // Set

Rodolph Perfetta 2013/03/12 11:24:45 Done.

+ orr(result, input_low, Operand(result, LSL, scratch));

+ b(negate);

+ bind(out_of_range);

+ mov(result, Operand::Zero());

+ b(done);

+ bind(&only_low);

+ // 52 <= exponent <= 83, shift only input_low.

+ // On entry, scratch contains: 52 - exponent.

+ rsb(scratch, scratch, Operand::Zero());

+ mov(result, Operand(input_low, LSL, scratch));

+ bind(negate);

+ // If input was positive, input_high ASR 31 equals 0 and

+ // input_high LSR 31 equals zero.

+ // New result = (result eor 0) + 0 = result.

+ // If the input was negative, we have to negate the result.

+ // Input_high ASR 31 equals 0xffffffff and input_high LSR 31 equals 1.

+ // New result = (result eor 0xffffffff) + 1 = 0 - result.

+ eor(result, result, Operand(input_high, ASR, 31));

ulan 2013/03/11 15:11:03 This is an awesome trick :)

+ add(result, result, Operand(input_high, LSR, 31));

+ bind(done);

}

« src/arm/macro-assembler-arm.h ('K') | « src/arm/macro-assembler-arm.h ('k') | src/arm/stub-cache-arm.cc » ('j') | no next file with comments »