Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: src/arm/macro-assembler-arm.cc

Issue 12567004: ARM: Optimsisation of ECMA ToInt32. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2418 matching lines...) Expand 10 before | Expand all | Expand 10 after
2429 void MacroAssembler::SmiToDoubleVFPRegister(Register smi, 2429 void MacroAssembler::SmiToDoubleVFPRegister(Register smi,
2430 DwVfpRegister value, 2430 DwVfpRegister value,
2431 Register scratch1, 2431 Register scratch1,
2432 SwVfpRegister scratch2) { 2432 SwVfpRegister scratch2) {
2433 mov(scratch1, Operand(smi, ASR, kSmiTagSize)); 2433 mov(scratch1, Operand(smi, ASR, kSmiTagSize));
2434 vmov(scratch2, scratch1); 2434 vmov(scratch2, scratch1);
2435 vcvt_f64_s32(value, scratch2); 2435 vcvt_f64_s32(value, scratch2);
2436 } 2436 }
2437 2437
2438 2438
2439 // Tries to get a signed int32 out of a double precision floating point heap
2440 // number. Rounds towards 0. Branch to 'not_int32' if the double is out of the
2441 // 32bits signed integer range.
2442 void MacroAssembler::ConvertToInt32(Register source,
2443 Register dest,
2444 Register scratch,
2445 Register scratch2,
2446 DwVfpRegister double_scratch,
2447 Label *not_int32) {
2448 if (CpuFeatures::IsSupported(VFP2)) {
2449 CpuFeatureScope scope(this, VFP2);
2450 sub(scratch, source, Operand(kHeapObjectTag));
2451 vldr(double_scratch, scratch, HeapNumber::kValueOffset);
2452 vcvt_s32_f64(double_scratch.low(), double_scratch);
2453 vmov(dest, double_scratch.low());
2454 // Signed vcvt instruction will saturate to the minimum (0x80000000) or
2455 // maximun (0x7fffffff) signed 32bits integer when the double is out of
2456 // range. When substracting one, the minimum signed integer becomes the
2457 // maximun signed integer.
2458 sub(scratch, dest, Operand(1));
2459 cmp(scratch, Operand(LONG_MAX - 1));
2460 // If equal then dest was LONG_MAX, if greater dest was LONG_MIN.
2461 b(ge, not_int32);
2462 } else {
2463 // This code is faster for doubles that are in the ranges -0x7fffffff to
2464 // -0x40000000 or 0x40000000 to 0x7fffffff. This corresponds almost to
2465 // the range of signed int32 values that are not Smis. Jumps to the label
2466 // 'not_int32' if the double isn't in the range -0x80000000.0 to
2467 // 0x80000000.0 (excluding the endpoints).
2468 Label right_exponent, done;
2469 // Get exponent word.
2470 ldr(scratch, FieldMemOperand(source, HeapNumber::kExponentOffset));
2471 // Get exponent alone in scratch2.
2472 Ubfx(scratch2,
2473 scratch,
2474 HeapNumber::kExponentShift,
2475 HeapNumber::kExponentBits);
2476 // Load dest with zero. We use this either for the final shift or
2477 // for the answer.
2478 mov(dest, Operand::Zero());
2479 // Check whether the exponent matches a 32 bit signed int that is not a Smi.
2480 // A non-Smi integer is 1.xxx * 2^30 so the exponent is 30 (biased). This is
2481 // the exponent that we are fastest at and also the highest exponent we can
2482 // handle here.
2483 const uint32_t non_smi_exponent = HeapNumber::kExponentBias + 30;
2484 // The non_smi_exponent, 0x41d, is too big for ARM's immediate field so we
2485 // split it up to avoid a constant pool entry. You can't do that in general
2486 // for cmp because of the overflow flag, but we know the exponent is in the
2487 // range 0-2047 so there is no overflow.
2488 int fudge_factor = 0x400;
2489 sub(scratch2, scratch2, Operand(fudge_factor));
2490 cmp(scratch2, Operand(non_smi_exponent - fudge_factor));
2491 // If we have a match of the int32-but-not-Smi exponent then skip some
2492 // logic.
2493 b(eq, &right_exponent);
2494 // If the exponent is higher than that then go to slow case. This catches
2495 // numbers that don't fit in a signed int32, infinities and NaNs.
2496 b(gt, not_int32);
2497
2498 // We know the exponent is smaller than 30 (biased). If it is less than
2499 // 0 (biased) then the number is smaller in magnitude than 1.0 * 2^0, i.e.
2500 // it rounds to zero.
2501 const uint32_t zero_exponent = HeapNumber::kExponentBias + 0;
2502 sub(scratch2, scratch2, Operand(zero_exponent - fudge_factor), SetCC);
2503 // Dest already has a Smi zero.
2504 b(lt, &done);
2505
2506 // We have an exponent between 0 and 30 in scratch2. Subtract from 30 to
2507 // get how much to shift down.
2508 rsb(dest, scratch2, Operand(30));
2509
2510 bind(&right_exponent);
2511 // Get the top bits of the mantissa.
2512 and_(scratch2, scratch, Operand(HeapNumber::kMantissaMask));
2513 // Put back the implicit 1.
2514 orr(scratch2, scratch2, Operand(1 << HeapNumber::kExponentShift));
2515 // Shift up the mantissa bits to take up the space the exponent used to
2516 // take. We just orred in the implicit bit so that took care of one and
2517 // we want to leave the sign bit 0 so we subtract 2 bits from the shift
2518 // distance.
2519 const int shift_distance = HeapNumber::kNonMantissaBitsInTopWord - 2;
2520 mov(scratch2, Operand(scratch2, LSL, shift_distance));
2521 // Put sign in zero flag.
2522 tst(scratch, Operand(HeapNumber::kSignMask));
2523 // Get the second half of the double. For some exponents we don't
2524 // actually need this because the bits get shifted out again, but
2525 // it's probably slower to test than just to do it.
2526 ldr(scratch, FieldMemOperand(source, HeapNumber::kMantissaOffset));
2527 // Shift down 22 bits to get the last 10 bits.
2528 orr(scratch, scratch2, Operand(scratch, LSR, 32 - shift_distance));
2529 // Move down according to the exponent.
2530 mov(dest, Operand(scratch, LSR, dest));
2531 // Fix sign if sign bit was set.
2532 rsb(dest, dest, Operand::Zero(), LeaveCC, ne);
2533 bind(&done);
2534 }
2535 }
2536
2537
2538 void MacroAssembler::TestDoubleIsInt32(DwVfpRegister double_input, 2439 void MacroAssembler::TestDoubleIsInt32(DwVfpRegister double_input,
2539 DwVfpRegister double_scratch) { 2440 DwVfpRegister double_scratch) {
2540 ASSERT(!double_input.is(double_scratch)); 2441 ASSERT(!double_input.is(double_scratch));
2541 ASSERT(CpuFeatures::IsSupported(VFP2)); 2442 ASSERT(CpuFeatures::IsSupported(VFP2));
2542 CpuFeatureScope scope(this, VFP2); 2443 CpuFeatureScope scope(this, VFP2);
2543 2444
2544 vcvt_s32_f64(double_scratch.low(), double_input); 2445 vcvt_s32_f64(double_scratch.low(), double_input);
2545 vcvt_f64_s32(double_scratch, double_scratch.low()); 2446 vcvt_f64_s32(double_scratch, double_scratch.low());
2546 VFPCompareAndSetFlags(double_input, double_scratch); 2447 VFPCompareAndSetFlags(double_input, double_scratch);
2547 } 2448 }
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
2601 // floor(x) <=> round_to_zero(x) - 1. 2502 // floor(x) <=> round_to_zero(x) - 1.
2602 bind(&negative); 2503 bind(&negative);
2603 sub(result, result, Operand(1), SetCC); 2504 sub(result, result, Operand(1), SetCC);
2604 // If result is still negative, go to done, result fetched. 2505 // If result is still negative, go to done, result fetched.
2605 // Else, we had an overflow and we fall through exception. 2506 // Else, we had an overflow and we fall through exception.
2606 b(mi, done); 2507 b(mi, done);
2607 bind(&exception); 2508 bind(&exception);
2608 } 2509 }
2609 2510
2610 2511
2611 void MacroAssembler::EmitOutOfInt32RangeTruncate(Register result, 2512 void MacroAssembler::ECMAConvertNumberToInt32(Register source,
2612 Register input_high, 2513 Register result,
2613 Register input_low, 2514 Register scratch,
2614 Register scratch) { 2515 Register input_high,
2615 Label done, normal_exponent, restore_sign; 2516 Register input_low,
2616 2517 DwVfpRegister double_scratch1,
2617 // Extract the biased exponent in result. 2518 DwVfpRegister double_scratch2) {
2618 Ubfx(result, 2519 if (CpuFeatures::IsSupported(VFP2)) {
2619 input_high, 2520 CpuFeatureScope scope(this, VFP2);
2620 HeapNumber::kExponentShift, 2521 vldr(double_scratch1, FieldMemOperand(source, HeapNumber::kValueOffset));
2621 HeapNumber::kExponentBits); 2522 ECMAToInt32VFP(result, double_scratch1, double_scratch2,
2622 2523 scratch, input_high, input_low);
2623 // Check for Infinity and NaNs, which should return 0. 2524 } else {
2624 cmp(result, Operand(HeapNumber::kExponentMask)); 2525 Ldrd(input_low, input_high,
2625 mov(result, Operand::Zero(), LeaveCC, eq); 2526 FieldMemOperand(source, HeapNumber::kValueOffset));
2626 b(eq, &done); 2527 ECMAToInt32NoVFP(result, scratch, input_high, input_low);
2627 2528 }
2628 // Express exponent as delta to (number of mantissa bits + 31).
2629 sub(result,
2630 result,
2631 Operand(HeapNumber::kExponentBias + HeapNumber::kMantissaBits + 31),
2632 SetCC);
2633
2634 // If the delta is strictly positive, all bits would be shifted away,
2635 // which means that we can return 0.
2636 b(le, &normal_exponent);
2637 mov(result, Operand::Zero());
2638 b(&done);
2639
2640 bind(&normal_exponent);
2641 const int kShiftBase = HeapNumber::kNonMantissaBitsInTopWord - 1;
2642 // Calculate shift.
2643 add(scratch, result, Operand(kShiftBase + HeapNumber::kMantissaBits), SetCC);
2644
2645 // Save the sign.
2646 Register sign = result;
2647 result = no_reg;
2648 and_(sign, input_high, Operand(HeapNumber::kSignMask));
2649
2650 // Set the implicit 1 before the mantissa part in input_high.
2651 orr(input_high,
2652 input_high,
2653 Operand(1 << HeapNumber::kMantissaBitsInTopWord));
2654 // Shift the mantissa bits to the correct position.
2655 // We don't need to clear non-mantissa bits as they will be shifted away.
2656 // If they weren't, it would mean that the answer is in the 32bit range.
2657 mov(input_high, Operand(input_high, LSL, scratch));
2658
2659 // Replace the shifted bits with bits from the lower mantissa word.
2660 Label pos_shift, shift_done;
2661 rsb(scratch, scratch, Operand(32), SetCC);
2662 b(&pos_shift, ge);
2663
2664 // Negate scratch.
2665 rsb(scratch, scratch, Operand::Zero());
2666 mov(input_low, Operand(input_low, LSL, scratch));
2667 b(&shift_done);
2668
2669 bind(&pos_shift);
2670 mov(input_low, Operand(input_low, LSR, scratch));
2671
2672 bind(&shift_done);
2673 orr(input_high, input_high, Operand(input_low));
2674 // Restore sign if necessary.
2675 cmp(sign, Operand::Zero());
2676 result = sign;
2677 sign = no_reg;
2678 rsb(result, input_high, Operand::Zero(), LeaveCC, ne);
2679 mov(result, input_high, LeaveCC, eq);
2680 bind(&done);
2681 } 2529 }
2682 2530
2683 2531
2684 void MacroAssembler::EmitECMATruncate(Register result, 2532 void MacroAssembler::ECMAToInt32VFP(Register result,
2685 DwVfpRegister double_input, 2533 DwVfpRegister double_input,
2686 DwVfpRegister double_scratch, 2534 DwVfpRegister double_scratch,
2687 Register scratch, 2535 Register scratch,
2688 Register input_high, 2536 Register input_high,
2689 Register input_low) { 2537 Register input_low) {
2690 CpuFeatureScope scope(this, VFP2); 2538 CpuFeatureScope scope(this, VFP2);
2691 ASSERT(!input_high.is(result)); 2539 ASSERT(!input_high.is(result));
2692 ASSERT(!input_low.is(result)); 2540 ASSERT(!input_low.is(result));
2693 ASSERT(!input_low.is(input_high)); 2541 ASSERT(!input_low.is(input_high));
2694 ASSERT(!scratch.is(result) && 2542 ASSERT(!scratch.is(result) &&
2695 !scratch.is(input_high) && 2543 !scratch.is(input_high) &&
2696 !scratch.is(input_low)); 2544 !scratch.is(input_low));
2697 ASSERT(!double_input.is(double_scratch)); 2545 ASSERT(!double_input.is(double_scratch));
2698 2546
2699 Label done; 2547 Label overflow, out_of_range, negate, done;
2700 2548
2701 // Test if the value can be exactly represented as a signed integer. 2549 vmov(input_low, input_high, double_input);
2702 TryDoubleToInt32Exact(result, double_input, double_scratch); 2550 Ubfx(scratch, input_high,
2703 b(eq, &done); 2551 HeapNumber::kExponentShift, HeapNumber::kExponentBits);
2552 // Load scratch with exponent - 1. This is faster than loading
2553 // with exponent because Bias + 1 = 1024 which is an *ARM* immediate value.
2554 sub(scratch, scratch, Operand(HeapNumber::kExponentBias + 1));
2555 // Compare exponent with 31 (compare exponent - 1 with 30).
2556 cmp(scratch, Operand(30));
2557 b(ge, &overflow);
2558 // Exponent is less than 31 so vcvt will never saturate.
2559 // So, just return the result.
2560 vcvt_s32_f64(double_scratch.low(), double_input);
2561 vmov(result, double_scratch.low());
2562 b(&done);
2704 2563
2705 // Check the exception flags. If they are not set, we are done. 2564 bind(&overflow);
2706 // If they are set, it could be because of the conversion above, or because 2565 // If exponent is greater than or equal to 84, the 32 less significant
2707 // they were set before this code. 2566 // bits are 0s (2^84 = 1, 52 significant bits, 32 uncoded bits),
2708 vmrs(scratch); 2567 // the result is 0.
2709 tst(scratch, Operand(kVFPInvalidOpExceptionBit)); 2568 // This test also catch Nan and infinities which also return 0.
2710 b(eq, &done); 2569 // Compare exponent with 84 (compare exponent - 1 with 83).
2570 cmp(scratch, Operand(83));
2571 b(ge, &out_of_range);
2711 2572
2712 // Clear cumulative exception flags. 2573 // If we reach this code, 31 <= exponent <= 83.
2713 bic(scratch, scratch, Operand(kVFPInvalidOpExceptionBit)); 2574 // So, we don't have to handle cases where 0 <= exponent <= 20 for
2714 vmsr(scratch); 2575 // which we would need to shift right the high part of the mantissa.
2715 // Try a conversion to a signed integer. 2576 ECMAToInt32Tail(result, scratch, input_high, input_low,
2716 vcvt_s32_f64(double_scratch.low(), double_input); 2577 &out_of_range, &negate, &done);
2717 // Retrieve the FPSCR.
2718 vmrs(scratch);
2719 // Check for invalid conversions (out of range and NaNs).
2720 tst(scratch, Operand(kVFPInvalidOpExceptionBit));
2721 // If we had no exceptions we are done.
2722 b(eq, &done);
2723
2724 // Load the double value and perform a manual truncation.
2725 vmov(input_low, input_high, double_input);
2726 EmitOutOfInt32RangeTruncate(result,
2727 input_high,
2728 input_low,
2729 scratch);
2730 bind(&done);
2731 } 2578 }
2732 2579
2733 2580
2581 void MacroAssembler::ECMAToInt32NoVFP(Register result,
2582 Register scratch,
2583 Register input_high,
2584 Register input_low) {
2585 ASSERT(!result.is(scratch));
2586 ASSERT(!result.is(input_high));
2587 ASSERT(!result.is(input_low));
2588 ASSERT(!scratch.is(input_high));
2589 ASSERT(!scratch.is(input_low));
2590 ASSERT(!input_high.is(input_low));
2591
2592 Label both, out_of_range, negate, done;
2593
2594 Ubfx(scratch, input_high,
2595 HeapNumber::kExponentShift, HeapNumber::kExponentBits);
2596 // Load scratch with exponent - 1. This is faster than loading
2597 // with exponent because Bias + 1 = 1024 which is an *ARM* immediate value.
2598 sub(scratch, scratch, Operand(HeapNumber::kExponentBias + 1));
2599 // If exponent is negative, 0 < input < 1, the result is 0.
2600 // If exponent is greater than or equal to 84, the 32 less significant
2601 // bits are 0s (2^84 = 1, 52 significant bits, 32 uncoded bits),
2602 // the result is 0.
2603 // This test also catch Nan and infinities which also return 0.
2604 // Compare exponent with 84 (compare exponent - 1 with 83).
2605 cmp(scratch, Operand(83));
2606 // We do an unsigned comparison so negative numbers are treated as big
2607 // positive number and the two tests above are done in one test.
2608 b(hs, &out_of_range);
2609
2610 // Load scratch with 20 - exponent (load with 19 - (exponent - 1)).
2611 rsb(scratch, scratch, Operand(19), SetCC);
2612 b(mi, &both);
2613
2614 // 0 <= exponent <= 20, shift only input_high.
2615 // Scratch contains: 20 - exponent.
2616 Ubfx(result, input_high,
2617 0, HeapNumber::kMantissaBitsInTopWord);
2618 orr(result, result, Operand(1 << HeapNumber::kMantissaBitsInTopWord));
2619 mov(result, Operand(result, LSR, scratch));
2620 b(&negate);
2621
2622 bind(&both);
2623 // Restore scratch to exponent - 1 to be consistent with ECMAToInt32VFP.
2624 rsb(scratch, scratch, Operand(19));
2625 ECMAToInt32Tail(result, scratch, input_high, input_low,
2626 &out_of_range, &negate, &done);
2627 }
2628
2629
2630 void MacroAssembler::ECMAToInt32Tail(Register result,
2631 Register scratch,
2632 Register input_high,
2633 Register input_low,
2634 Label* out_of_range,
2635 Label* negate,
2636 Label* done) {
2637 Label only_low;
2638
2639 // On entry, scratch contains exponent - 1.
2640 // Load scratch with 52 - exponent (load with 51 - (exponent - 1)).
2641 rsb(scratch, scratch, Operand(51), SetCC);
2642 b(ls, &only_low);
2643 // 21 <= exponent <= 51, shift input_low and input_high
2644 // to generate the result.
2645 mov(input_low, Operand(input_low, LSR, scratch));
2646 // Scratch contains: 52 - exponent.
2647 // We needs: exponent - 20.
2648 // So we use: 32 - scratch = 32 - 52 + exponent = exponent - 20.
2649 rsb(scratch, scratch, Operand(32));
2650 Ubfx(result, input_high,
2651 0, HeapNumber::kMantissaBitsInTopWord);
2652 orr(result, result, Operand(1 << HeapNumber::kMantissaBitsInTopWord));
ulan 2013/03/11 15:11:03 Maybe restore the comment about implicit 1? // Set
Rodolph Perfetta 2013/03/12 11:24:45 Done.
2653 orr(result, input_low, Operand(result, LSL, scratch));
2654 b(negate);
2655
2656 bind(out_of_range);
2657 mov(result, Operand::Zero());
2658 b(done);
2659
2660 bind(&only_low);
2661 // 52 <= exponent <= 83, shift only input_low.
2662 // On entry, scratch contains: 52 - exponent.
2663 rsb(scratch, scratch, Operand::Zero());
2664 mov(result, Operand(input_low, LSL, scratch));
2665
2666 bind(negate);
2667 // If input was positive, input_high ASR 31 equals 0 and
2668 // input_high LSR 31 equals zero.
2669 // New result = (result eor 0) + 0 = result.
2670 // If the input was negative, we have to negate the result.
2671 // Input_high ASR 31 equals 0xffffffff and input_high LSR 31 equals 1.
2672 // New result = (result eor 0xffffffff) + 1 = 0 - result.
2673 eor(result, result, Operand(input_high, ASR, 31));
ulan 2013/03/11 15:11:03 This is an awesome trick :)
2674 add(result, result, Operand(input_high, LSR, 31));
2675
2676 bind(done);
2677 }
2678
2679
2734 void MacroAssembler::GetLeastBitsFromSmi(Register dst, 2680 void MacroAssembler::GetLeastBitsFromSmi(Register dst,
2735 Register src, 2681 Register src,
2736 int num_least_bits) { 2682 int num_least_bits) {
2737 if (CpuFeatures::IsSupported(ARMv7) && !predictable_code_size()) { 2683 if (CpuFeatures::IsSupported(ARMv7) && !predictable_code_size()) {
2738 ubfx(dst, src, kSmiTagSize, num_least_bits); 2684 ubfx(dst, src, kSmiTagSize, num_least_bits);
2739 } else { 2685 } else {
2740 mov(dst, Operand(src, ASR, kSmiTagSize)); 2686 mov(dst, Operand(src, ASR, kSmiTagSize));
2741 and_(dst, dst, Operand((1 << num_least_bits) - 1)); 2687 and_(dst, dst, Operand((1 << num_least_bits) - 1));
2742 } 2688 }
2743 } 2689 }
(...skipping 1277 matching lines...) Expand 10 before | Expand all | Expand 10 after
4021 void CodePatcher::EmitCondition(Condition cond) { 3967 void CodePatcher::EmitCondition(Condition cond) {
4022 Instr instr = Assembler::instr_at(masm_.pc_); 3968 Instr instr = Assembler::instr_at(masm_.pc_);
4023 instr = (instr & ~kCondMask) | cond; 3969 instr = (instr & ~kCondMask) | cond;
4024 masm_.emit(instr); 3970 masm_.emit(instr);
4025 } 3971 }
4026 3972
4027 3973
4028 } } // namespace v8::internal 3974 } } // namespace v8::internal
4029 3975
4030 #endif // V8_TARGET_ARCH_ARM 3976 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698