Index: src/arm/codegen-arm.cc |
=================================================================== |
--- src/arm/codegen-arm.cc (revision 4950) |
+++ src/arm/codegen-arm.cc (working copy) |
@@ -6258,6 +6258,86 @@ |
#define __ ACCESS_MASM(masm) |
+// This uses versions of the sum-of-digits-to-see-if-a-number-is-divisible-by-3 |
+// trick. See http://en.wikipedia.org/wiki/Divisibility_rule |
+// Takes the sum of the digits base (mask + 1) repeatedly until we have a |
+// number from 0 to mask. On exit the 'eq' condition flags are set if the |
+// answer is exactly the mask. |
+void DigitSum(MacroAssembler* masm, |
Søren Thygesen Gjesse
2010/06/28 07:19:36
static (times 5)? And why are these functions not
|
+ Register lhs, |
+ int mask, |
+ int shift) { |
+ ASSERT(mask > 0); |
+ ASSERT(mask <= 0xff); // This ensures we don't need ip to use it. |
+ Label loop, entry; |
+ __ jmp(&entry); |
+ __ bind(&loop); |
+ __ and_(ip, lhs, Operand(mask)); |
+ __ add(lhs, ip, Operand(lhs, LSR, shift)); |
+ __ bind(&entry); |
+ __ cmp(lhs, Operand(mask)); |
+ __ b(gt, &loop); |
+} |
+ |
+ |
+void DigitSum(MacroAssembler* masm, |
+ Register lhs, |
+ Register scratch, |
+ int mask, |
+ int shift1, |
+ int shift2) { |
+ ASSERT(mask > 0); |
+ ASSERT(mask <= 0xff); // This ensures we don't need ip to use it. |
+ Label loop, entry; |
+ __ jmp(&entry); |
+ __ bind(&loop); |
+ __ bic(scratch, lhs, Operand(mask)); |
+ __ and_(ip, lhs, Operand(mask)); |
+ __ add(lhs, ip, Operand(lhs, LSR, shift1)); |
+ __ add(lhs, lhs, Operand(scratch, LSR, shift2)); |
+ __ bind(&entry); |
+ __ cmp(lhs, Operand(mask)); |
+ __ b(gt, &loop); |
+} |
+ |
+ |
+// Splits the number into two halves (bottom half has shift bits). The top |
+// half is subtracted from the bottom half. If the result is negative then |
+// rhs is added. |
+void ModGetInRangeBySubtraction(MacroAssembler* masm, |
+ Register lhs, |
+ int shift, |
+ int rhs) { |
+ int mask = (1 << shift) - 1; |
+ __ and_(ip, lhs, Operand(mask)); |
+ __ sub(lhs, ip, Operand(lhs, LSR, shift), SetCC); |
+ __ add(lhs, lhs, Operand(rhs), LeaveCC, mi); |
+} |
+ |
+ |
+void ModReduce(MacroAssembler* masm, |
+ Register lhs, |
+ int max, |
+ int denominator) { |
+ int limit = denominator; |
+ while (limit * 2 <= max) limit *= 2; |
+ while (limit >= denominator) { |
+ __ cmp(lhs, Operand(limit)); |
+ __ sub(lhs, lhs, Operand(limit), LeaveCC, ge); |
+ limit >>= 1; |
+ } |
+} |
+ |
+ |
+void ModAnswer(MacroAssembler* masm, |
+ Register result, |
+ Register shift_distance, |
+ Register mask_bits, |
+ Register sum_of_digits) { |
+ __ add(result, mask_bits, Operand(sum_of_digits, LSL, shift_distance)); |
+ __ Ret(); |
+} |
+ |
Handle<String> Reference::GetName() { |
ASSERT(type_ == NAMED); |
Property* property = expression_->AsProperty(); |
@@ -6621,7 +6701,7 @@ |
__ bind(¬_special); |
// Count leading zeros. Uses mantissa for a scratch register on pre-ARM5. |
// Gets the wrong answer for 0, but we already checked for that case above. |
- __ CountLeadingZeros(source_, mantissa, zeros_); |
+ __ CountLeadingZeros(zeros_, source_, mantissa); |
// Compute exponent and or it into the exponent register. |
// We use mantissa as a scratch register here. Use a fudge factor to |
// divide the constant 31 + HeapNumber::kExponentBias, 0x41d, into two parts |
@@ -7350,7 +7430,7 @@ |
// If we have floating point hardware, inline ADD, SUB, MUL, and DIV, |
// using registers d7 and d6 for the double values. |
- if (use_fp_registers) { |
+ if (CpuFeatures::IsSupported(VFP3)) { |
CpuFeatures::Scope scope(VFP3); |
__ mov(r7, Operand(rhs, ASR, kSmiTagSize)); |
__ vmov(s15, r7); |
@@ -7358,8 +7438,12 @@ |
__ mov(r7, Operand(lhs, ASR, kSmiTagSize)); |
__ vmov(s13, r7); |
__ vcvt_f64_s32(d6, s13); |
+ if (!use_fp_registers) { |
+ __ vmov(r2, r3, d7); |
+ __ vmov(r0, r1, d6); |
+ } |
} else { |
- // Write Smi from rhs to r3 and r2 in double format. r3 is scratch. |
+ // Write Smi from rhs to r3 and r2 in double format. r9 is scratch. |
__ mov(r7, Operand(rhs)); |
ConvertToDoubleStub stub1(r3, r2, r7, r9); |
__ push(lr); |
@@ -7434,12 +7518,15 @@ |
__ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow); |
} |
- if (use_fp_registers) { |
+ if (CpuFeatures::IsSupported(VFP3)) { |
CpuFeatures::Scope scope(VFP3); |
// Convert smi in r0 to double in d7. |
__ mov(r7, Operand(r0, ASR, kSmiTagSize)); |
__ vmov(s15, r7); |
__ vcvt_f64_s32(d7, s15); |
+ if (!use_fp_registers) { |
+ __ vmov(r2, r3, d7); |
+ } |
} else { |
// Write Smi from r0 to r3 and r2 in double format. |
__ mov(r7, Operand(r0)); |
@@ -7490,12 +7577,15 @@ |
__ AllocateHeapNumber(r5, r4, r7, heap_number_map, &slow); |
} |
- if (use_fp_registers) { |
+ if (CpuFeatures::IsSupported(VFP3)) { |
CpuFeatures::Scope scope(VFP3); |
// Convert smi in r1 to double in d6. |
__ mov(r7, Operand(r1, ASR, kSmiTagSize)); |
__ vmov(s13, r7); |
__ vcvt_f64_s32(d6, s13); |
+ if (!use_fp_registers) { |
+ __ vmov(r0, r1, d6); |
+ } |
} else { |
// Write Smi from r1 to r1 and r0 in double format. |
__ mov(r7, Operand(r1)); |
@@ -7942,6 +8032,98 @@ |
} |
+// See comment for class. |
+void IntegerModStub::Generate(MacroAssembler* masm) { |
+ __ mov(lhs_, Operand(lhs_, LSR, shift_distance_)); |
+ __ bic(odd_number_, odd_number_, Operand(1)); |
+ __ mov(odd_number_, Operand(odd_number_, LSL, 1)); |
+ // We now have (odd_number_ - 1) * 2 in the register. |
+ // Build a switch out of branches instead of data because it avoids |
+ // having to teach the assembler about intra-code-object pointers |
+ // that are not in relative branch instructions. |
+ Label mod3, mod5, mod7, mod9, mod11, mod13, mod15, mod17, mod19; |
+ Label mod21, mod23, mod25; |
+ { Assembler::BlockConstPoolScope block_const_pool(masm); |
+ __ add(pc, pc, Operand(odd_number_)); |
+ // When you read pc it is always 8 ahead, but when you write it you always |
+ // write the actual value. So we put in two nops to take up the slack. |
+ __ nop(); |
+ __ nop(); |
+ __ b(&mod3); |
+ __ b(&mod5); |
+ __ b(&mod7); |
+ __ b(&mod9); |
+ __ b(&mod11); |
+ __ b(&mod13); |
+ __ b(&mod15); |
+ __ b(&mod17); |
+ __ b(&mod19); |
+ __ b(&mod21); |
+ __ b(&mod23); |
+ __ b(&mod25); |
+ } |
+ __ bind(&mod3); |
+ DigitSum(masm, lhs_, 3, 2); |
+ __ sub(lhs_, lhs_, Operand(3), LeaveCC, eq); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod5); |
+ DigitSum(masm, lhs_, 0xf, 4); |
+ ModGetInRangeBySubtraction(masm, lhs_, 2, 5); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod7); |
+ DigitSum(masm, lhs_, 7, 3); |
+ __ sub(lhs_, lhs_, Operand(7), LeaveCC, eq); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod9); |
+ DigitSum(masm, lhs_, 0x3f, 6); |
+ ModGetInRangeBySubtraction(masm, lhs_, 3, 9); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod11); |
+ DigitSum(masm, lhs_, r5, 0x3f, 6, 3); |
+ ModReduce(masm, lhs_, 0x3f, 11); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod13); |
+ DigitSum(masm, lhs_, r5, 0xff, 8, 5); |
+ ModReduce(masm, lhs_, 0xff, 13); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod15); |
+ DigitSum(masm, lhs_, 0xf, 4); |
+ __ sub(lhs_, lhs_, Operand(15), LeaveCC, eq); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod17); |
+ DigitSum(masm, lhs_, 0xff, 8); |
+ ModGetInRangeBySubtraction(masm, lhs_, 4, 17); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod19); |
+ DigitSum(masm, lhs_, r5, 0xff, 8, 5); |
+ ModReduce(masm, lhs_, 0xff, 19); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod21); |
+ DigitSum(masm, lhs_, 0x3f, 6); |
+ ModReduce(masm, lhs_, 0x3f, 21); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod23); |
+ DigitSum(masm, lhs_, r5, 0xff, 8, 7); |
+ ModReduce(masm, lhs_, 0xff, 23); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+ |
+ __ bind(&mod25); |
+ DigitSum(masm, lhs_, r5, 0x7f, 7, 6); |
+ ModReduce(masm, lhs_, 0x7f, 25); |
+ ModAnswer(masm, result_, shift_distance_, mask_bits_, lhs_); |
+} |
+ |
+ |
const char* GenericBinaryOpStub::GetName() { |
if (name_ != NULL) return name_; |
const int len = 100; |
@@ -8069,7 +8251,7 @@ |
case Token::MOD: { |
Label not_smi; |
if (ShouldGenerateSmiCode() && specialized_on_rhs_) { |
- Label smi_is_unsuitable; |
+ Label lhs_is_unsuitable; |
__ BranchOnNotSmi(lhs, ¬_smi); |
if (IsPowerOf2(constant_rhs_)) { |
if (op_ == Token::MOD) { |
@@ -8090,14 +8272,14 @@ |
__ eor(rhs, rhs, Operand(0x80000000u), SetCC); |
// Next two instructions are conditional on the answer being -0. |
__ mov(rhs, Operand(Smi::FromInt(constant_rhs_)), LeaveCC, eq); |
- __ b(eq, &smi_is_unsuitable); |
+ __ b(eq, &lhs_is_unsuitable); |
// We need to subtract the dividend. Eg. -3 % 4 == -3. |
__ sub(result, rhs, Operand(Smi::FromInt(constant_rhs_))); |
} else { |
ASSERT(op_ == Token::DIV); |
__ tst(lhs, |
Operand(0x80000000u | ((constant_rhs_ << kSmiTagSize) - 1))); |
- __ b(ne, &smi_is_unsuitable); // Go slow on negative or remainder. |
+ __ b(ne, &lhs_is_unsuitable); // Go slow on negative or remainder. |
int shift = 0; |
int d = constant_rhs_; |
while ((d & 1) == 0) { |
@@ -8110,7 +8292,7 @@ |
} else { |
// Not a power of 2. |
__ tst(lhs, Operand(0x80000000u)); |
- __ b(ne, &smi_is_unsuitable); |
+ __ b(ne, &lhs_is_unsuitable); |
// Find a fixed point reciprocal of the divisor so we can divide by |
// multiplying. |
double divisor = 1.0 / constant_rhs_; |
@@ -8145,7 +8327,7 @@ |
// (lhs / rhs) where / indicates integer division. |
if (op_ == Token::DIV) { |
__ cmp(lhs, Operand(scratch, LSL, required_scratch_shift)); |
- __ b(ne, &smi_is_unsuitable); // There was a remainder. |
+ __ b(ne, &lhs_is_unsuitable); // There was a remainder. |
__ mov(result, Operand(scratch2, LSL, kSmiTagSize)); |
} else { |
ASSERT(op_ == Token::MOD); |
@@ -8153,14 +8335,21 @@ |
} |
} |
__ Ret(); |
- __ bind(&smi_is_unsuitable); |
+ __ bind(&lhs_is_unsuitable); |
} else if (op_ == Token::MOD && |
runtime_operands_type_ != BinaryOpIC::HEAP_NUMBERS && |
runtime_operands_type_ != BinaryOpIC::STRINGS) { |
// Do generate a bit of smi code for modulus even though the default for |
// modulus is not to do it, but as the ARM processor has no coprocessor |
- // support for modulus checking for smis makes sense. |
+ // support for modulus checking for smis makes sense. We can handle |
+ // 1 to 25 times any power of 2. This covers over half the numbers from |
+ // 1 to 100 including all of the first 25. (Actually the constants < 10 |
+ // are handled above by reciprocal multiplication. We only get here for |
+ // those cases if the right hand side is not a constant or for cases |
+ // like 192 which is 3*2^6 and ends up in the 3 case in the integer mod |
+ // stub.) |
Label slow; |
+ Label not_power_of_2; |
ASSERT(!ShouldGenerateSmiCode()); |
ASSERT(kSmiTag == 0); // Adjust code below. |
// Check for two positive smis. |
@@ -8168,13 +8357,42 @@ |
__ tst(smi_test_reg, Operand(0x80000000u | kSmiTagMask)); |
__ b(ne, &slow); |
// Check that rhs is a power of two and not zero. |
+ Register mask_bits = r3; |
__ sub(scratch, rhs, Operand(1), SetCC); |
__ b(mi, &slow); |
- __ tst(rhs, scratch); |
- __ b(ne, &slow); |
+ __ and_(mask_bits, rhs, Operand(scratch), SetCC); |
+ __ b(ne, ¬_power_of_2); |
// Calculate power of two modulus. |
__ and_(result, lhs, Operand(scratch)); |
__ Ret(); |
+ |
+ __ bind(¬_power_of_2); |
+ __ eor(scratch, scratch, Operand(mask_bits)); |
+ // At least two bits are set in the modulus. The high one(s) are in |
+ // mask_bits and the low one is scratch + 1. |
+ __ and_(mask_bits, scratch, Operand(lhs)); |
+ Register shift_distance = scratch; |
+ scratch = no_reg; |
+ |
+ // The rhs consists of a power of 2 multiplied by some odd number. |
+ // The power-of-2 part we handle by putting the corresponding bits |
+ // from the lhs in the mask_bits register, and the power in the |
+ // shift_distance register. Shift distance is never 0 due to Smi |
+ // tagging. |
+ __ CountLeadingZeros(r4, shift_distance, shift_distance); |
+ __ rsb(shift_distance, r4, Operand(32)); |
+ |
+ // Now we need to find out what the odd number is. The last bit is |
+ // always 1. |
+ Register odd_number = r4; |
+ __ mov(odd_number, Operand(rhs, LSR, shift_distance)); |
+ __ cmp(odd_number, Operand(25)); |
+ __ b(gt, &slow); |
+ |
+ IntegerModStub stub( |
+ result, shift_distance, odd_number, mask_bits, lhs, r5); |
+ __ Jump(stub.GetCode(), RelocInfo::CODE_TARGET); // Tail call. |
+ |
__ bind(&slow); |
} |
HandleBinaryOpSlowCases( |