Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: src/ia32/code-stubs-ia32.cc

Issue 8749002: Implement Math.pow using FPU instructions and inline it in crankshaft (ia32). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: remove unnecessary heap number check in crankshaft code Created 9 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 2920 matching lines...) Expand 10 before | Expand all | Expand 10 after
2931 } 2931 }
2932 2932
2933 2933
2934 void FloatingPointHelper::CheckFloatOperandsAreInt32(MacroAssembler* masm, 2934 void FloatingPointHelper::CheckFloatOperandsAreInt32(MacroAssembler* masm,
2935 Label* non_int32) { 2935 Label* non_int32) {
2936 return; 2936 return;
2937 } 2937 }
2938 2938
2939 2939
2940 void MathPowStub::Generate(MacroAssembler* masm) { 2940 void MathPowStub::Generate(MacroAssembler* masm) {
2941 // Registers are used as follows:
2942 // edx = base
2943 // eax = exponent
2944 // ecx = temporary, result
2945
2946 CpuFeatures::Scope use_sse2(SSE2); 2941 CpuFeatures::Scope use_sse2(SSE2);
2947 Label allocate_return, call_runtime; 2942 Factory* factory = masm->isolate()->factory();
2948 2943 Label double_int_runtime, generic_runtime, done;
2949 // Load input parameters. 2944 Label base_is_smi, unpack_exponent, exponent_not_smi, int_exponent;
2950 __ mov(edx, Operand(esp, 2 * kPointerSize));
2951 __ mov(eax, Operand(esp, 1 * kPointerSize));
2952
2953 // Save 1 in xmm3 - we need this several times later on. 2945 // Save 1 in xmm3 - we need this several times later on.
2954 __ mov(ecx, Immediate(1)); 2946 __ mov(ecx, Immediate(1));
2955 __ cvtsi2sd(xmm3, ecx); 2947 __ cvtsi2sd(xmm3, ecx);
2956 2948
2957 Label exponent_nonsmi; 2949 switch (exponent_type_) {
2958 Label base_nonsmi; 2950 case ON_STACK:
2959 // If the exponent is a heap number go to that specific case. 2951 // Load input parameters from stack
2960 __ JumpIfNotSmi(eax, &exponent_nonsmi); 2952 __ mov(edx, Operand(esp, 2 * kPointerSize));
2961 __ JumpIfNotSmi(edx, &base_nonsmi); 2953 __ mov(eax, Operand(esp, 1 * kPointerSize));
2962 2954 // edx: base (smi or heap number)
2963 // Optimized version when both exponent and base are smis. 2955 // eax: exponent (smi or heap number)
2964 Label powi; 2956 __ JumpIfSmi(edx, &base_is_smi, Label::kNear);
2965 __ SmiUntag(edx); 2957 __ cmp(FieldOperand(edx, HeapObject::kMapOffset),
2966 __ cvtsi2sd(xmm0, edx); 2958 factory->heap_number_map());
2967 __ jmp(&powi); 2959 __ j(not_equal, &generic_runtime);
2968 // exponent is smi and base is a heapnumber. 2960
2969 __ bind(&base_nonsmi); 2961 // Check base for NaN or +/-Infinity
2970 Factory* factory = masm->isolate()->factory(); 2962 __ mov(ecx, FieldOperand(edx, HeapNumber::kExponentOffset));
2971 __ cmp(FieldOperand(edx, HeapObject::kMapOffset), 2963 __ and_(ecx, HeapNumber::kExponentMask);
2972 factory->heap_number_map()); 2964 __ cmp(ecx, Immediate(HeapNumber::kExponentMask));
2973 __ j(not_equal, &call_runtime); 2965 __ j(greater_equal, &generic_runtime);
ulan 2011/12/01 18:11:26 j(equal) seems to be sufficient as ecx cannot be g
2974 2966 __ movdbl(xmm1, FieldOperand(edx, HeapNumber::kValueOffset));
2975 __ movdbl(xmm0, FieldOperand(edx, HeapNumber::kValueOffset)); 2967
2976 2968 __ jmp(&unpack_exponent, Label::kNear);
ulan 2011/12/01 18:11:26 Move the jump before the empty line?
2977 // Optimized version of pow if exponent is a smi. 2969 __ bind(&base_is_smi);
2978 // xmm0 contains the base. 2970 __ SmiUntag(edx);
2979 __ bind(&powi); 2971 __ cvtsi2sd(xmm1, edx);
2980 __ SmiUntag(eax); 2972 __ bind(&unpack_exponent);
2981 2973 // Fall through is intended.
2974 case TAGGED:
2975 // xmm1: base as double
2976 // eax: exponent (smi or heap number)
2977 __ JumpIfNotSmi(eax, &exponent_not_smi, Label::kNear);
2978 __ SmiUntag(eax);
2979 __ jmp(&int_exponent);
2980 __ bind(&exponent_not_smi);
2981 if (exponent_type_ == ON_STACK) {
ulan 2011/12/01 18:11:26 I wonder if copy-pasting and keeping the ON_STACK
2982 // Heap number check not necessary in optimized code as we will have
2983 // already deoptimized if eax was neither smi nor heap number.
2984 __ cmp(FieldOperand(eax, HeapObject::kMapOffset),
2985 factory->heap_number_map());
2986 __ j(not_equal, &generic_runtime);
2987 }
2988 __ movdbl(xmm2, FieldOperand(eax, HeapNumber::kValueOffset));
2989 break;
2990 case INTEGER:
2991 // xmm1: base as double
2992 // eax: exponent as untagged integer
2993 case DOUBLE:
2994 // xmm1: base as double
2995 // xmm2: exponent as double
2996 // Check base for NaN or +/-Infinity
2997 if (CpuFeatures::IsSupported(SSE4_1)) {
2998 __ extractps(ecx, xmm1, 4 * kBitsPerByte);
ulan 2011/12/01 18:11:26 Since we already use HeapNumber::kExponentMask bel
2999 } else {
3000 __ movsd(xmm4, xmm1);
3001 __ psrlq(xmm4, 4 * kBitsPerByte);
3002 __ movd(ecx, xmm4);
3003 }
3004 __ and_(ecx, HeapNumber::kExponentMask);
3005 __ cmp(ecx, Immediate(HeapNumber::kExponentMask));
3006 __ j(greater_equal, &generic_runtime);
3007 break;
3008 default:
3009 UNREACHABLE();
3010 }
3011 if (exponent_type_ != INTEGER) {
3012 Label not_minus_half, fast_power;
3013 // xmm1: base as double that is not +/- Infinity or NaN
3014 // xmm2: exponent as double
3015 // Detect integer exponents stored as double.
3016 __ cvttsd2si(eax, Operand(xmm2));
3017 __ cmp(eax, Immediate(0x80000000)); // Skip to runtime if possibly NaN.
3018 __ j(equal, &generic_runtime);
3019 __ cvtsi2sd(xmm4, eax);
3020 __ ucomisd(xmm2, xmm4);
3021 __ j(equal, &int_exponent);
3022
3023 // Detect square root case.
3024 // Test for -0.5.
3025 // Load xmm4 with -0.5.
3026 __ mov(ecx, Immediate(0xBF000000));
3027 __ movd(xmm4, ecx);
3028 __ cvtss2sd(xmm4, xmm4);
3029 // xmm3 now has -0.5.
ulan 2011/12/01 18:11:26 xmm4 now has -0.5.
3030 __ ucomisd(xmm4, xmm2);
3031 __ j(not_equal, &not_minus_half, Label::kNear);
3032
3033 // Calculates reciprocal of square root.eax
3034 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.
3035 __ xorps(xmm2, xmm2);
3036 __ addsd(xmm2, xmm1);
3037 __ sqrtsd(xmm2, xmm2);
3038 __ divsd(xmm3, xmm2);
3039 __ jmp(&done);
3040 __ extractps(ecx, xmm1, 4 * kBitsPerByte);
ulan 2011/12/01 18:11:26 Redundant instruction.
3041 // Test for 0.5.
3042 __ bind(&not_minus_half);
3043 // Load xmm2 with 0.5.
3044 // Since xmm3 is 1 and xmm4 is -0.5 this is simply xmm4 + xmm3.
3045 __ addsd(xmm4, xmm3);
3046 // xmm2 now has 0.5.
3047 __ ucomisd(xmm4, xmm2);
3048 __ j(not_equal, &fast_power, Label::kNear);
3049 // Calculates square root.
3050 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.
3051 __ xorps(xmm4, xmm4);
3052 __ addsd(xmm4, xmm1);
3053 __ sqrtsd(xmm3, xmm4);
3054 __ jmp(&done);
3055
3056 // Using FPU instructions to calculate power.
3057 Label fast_power_failed;
3058 __ bind(&fast_power);
3059 // Transfer (B)ase and (E)xponent onto the FPU register stack.
3060 __ sub(esp, Immediate(kDoubleSize));
3061 __ movdbl(Operand(esp, 0), xmm2);
3062 __ fld_d(Operand(esp, 0)); // E
3063 __ movdbl(Operand(esp, 0), xmm1);
3064 __ fld_d(Operand(esp, 0)); // B, E
3065
3066 // Exponent is in st(1) and base is in st(0)
3067 // B ^ E = (2^(E * log2(B)) - 1) + 1 = (2^X - 1) + 1 for X = E * log2(B)
3068 // FYL2X calculates st(1) * log2(st(0))
3069 __ fyl2x(); // X
3070 __ fld(0); // X, X
3071 __ frndint(); // rnd(X), X
3072 __ fsub(1); // rnd(X), X-rnd(X)
3073 __ fxch(1); // X - rnd(X), rnd(X)
3074 // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1
3075 __ f2xm1(); // 2^(X-rnd(X)) - 1, rnd(X)
3076 __ fld1(); // 1, 2^(X-rnd(X)) - 1, rnd(X)
3077 __ faddp(1); // 1, 2^(X-rnd(X)), rnd(X)
3078 // FSCALE calculates st(0) * 2^st(1)
3079 __ fscale(); // 2^X, rnd(X)
3080 __ fstp(1);
3081 // Bail out to runtime in case of exceptions in the status word.
3082 __ fnstsw_ax();
3083 __ test_b(eax, 0x5F);
3084 __ j(not_zero, &fast_power_failed, Label::kNear);
3085 __ fstp_d(Operand(esp, 0));
3086 __ movdbl(xmm3, Operand(esp, 0));
3087 __ add(esp, Immediate(kDoubleSize));
3088 __ jmp(&done);
3089
3090 __ bind(&fast_power_failed);
3091 __ fninit();
3092 __ add(esp, Immediate(kDoubleSize));
3093 __ jmp(&generic_runtime);
3094 }
3095
3096 // Calculate power with integer exponent.
3097 __ bind(&int_exponent);
3098 // xmm0: base as double that is not +/- Infinity or NaN
ulan 2011/12/01 18:11:26 xmm0 -> xmm1
3099 // eax: exponent as untagged integer
2982 // Save exponent in base as we need to check if exponent is negative later. 3100 // Save exponent in base as we need to check if exponent is negative later.
ulan 2011/12/01 18:11:26 base -> ecx in the comment
2983 // We know that base and exponent are in different registers. 3101 // We know that base and exponent are in different registers.
2984 __ mov(edx, eax); 3102 __ mov(ecx, eax); // Back up exponent.
3103 __ movsd(xmm4, xmm1); // Back up base.
3104 __ movsd(xmm2, xmm3); // Load xmm2 with 1.
2985 3105
2986 // Get absolute value of exponent. 3106 // Get absolute value of exponent.
2987 Label no_neg; 3107 Label no_neg, while_true, no_multiply;
2988 __ cmp(eax, 0); 3108 __ cmp(eax, 0);
2989 __ j(greater_equal, &no_neg, Label::kNear); 3109 __ j(greater_equal, &no_neg, Label::kNear);
2990 __ neg(eax); 3110 __ neg(eax);
2991 __ bind(&no_neg); 3111 __ bind(&no_neg);
2992 3112
2993 // Load xmm1 with 1.
2994 __ movsd(xmm1, xmm3);
2995 Label while_true;
2996 Label no_multiply;
2997
2998 __ bind(&while_true); 3113 __ bind(&while_true);
2999 __ shr(eax, 1); 3114 __ shr(eax, 1);
3000 __ j(not_carry, &no_multiply, Label::kNear); 3115 __ j(not_carry, &no_multiply, Label::kNear);
3001 __ mulsd(xmm1, xmm0); 3116 __ mulsd(xmm3, xmm1);
3002 __ bind(&no_multiply); 3117 __ bind(&no_multiply);
3003 __ mulsd(xmm0, xmm0); 3118 __ mulsd(xmm1, xmm1);
3004 __ j(not_zero, &while_true); 3119 __ j(not_zero, &while_true);
3005 3120
3006 // base has the original value of the exponent - if the exponent is 3121 // base has the original value of the exponent - if the exponent is
ulan 2011/12/01 18:11:26 base -> ecx in the comment
3007 // negative return 1/result. 3122 // negative return 1/result.
3008 __ test(edx, edx); 3123 __ test(ecx, ecx);
3009 __ j(positive, &allocate_return); 3124 __ j(positive, &done);
3010 // Special case if xmm1 has reached infinity. 3125 // Special case if xmm3 has reached infinity.
3011 __ mov(ecx, Immediate(0x7FB00000)); 3126 __ mov(eax, Immediate(0x7F800000));
3012 __ movd(xmm0, ecx); 3127 __ movd(xmm1, eax);
3013 __ cvtss2sd(xmm0, xmm0); 3128 __ cvtss2sd(xmm1, xmm1);
3014 __ ucomisd(xmm0, xmm1); 3129 __ ucomisd(xmm1, xmm3);
3015 __ j(equal, &call_runtime); 3130 __ j(equal, &double_int_runtime);
3016 __ divsd(xmm3, xmm1); 3131 __ divsd(xmm2, xmm3);
3017 __ movsd(xmm1, xmm3); 3132 __ movsd(xmm3, xmm2);
3018 __ jmp(&allocate_return); 3133
3019 3134 // Returning or bailing out.
3020 // exponent (or both) is a heapnumber - no matter what we should now work 3135 if (exponent_type_ == ON_STACK) {
3021 // on doubles. 3136 // We expect the result as heap number in eax.
3022 __ bind(&exponent_nonsmi); 3137 __ bind(&done);
3023 __ cmp(FieldOperand(eax, HeapObject::kMapOffset), 3138 // xmm1: result
ulan 2011/12/01 18:11:26 xmm1 -> xmm3 in the comment.
3024 factory->heap_number_map()); 3139 __ AllocateHeapNumber(eax, ecx, edx, &generic_runtime);
3025 __ j(not_equal, &call_runtime); 3140 __ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm3);
3026 __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset)); 3141 __ ret(2 * kPointerSize);
3027 // Test if exponent is nan. 3142
3028 __ ucomisd(xmm1, xmm1); 3143 // The arguments are still on the stack.
3029 __ j(parity_even, &call_runtime); 3144 __ bind(&generic_runtime);
3030 3145 __ bind(&double_int_runtime);
3031 Label base_not_smi; 3146 __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
3032 Label handle_special_cases; 3147 } else {
3033 __ JumpIfNotSmi(edx, &base_not_smi, Label::kNear); 3148 __ jmp(&done);
3034 __ SmiUntag(edx); 3149
3035 __ cvtsi2sd(xmm0, edx); 3150 Label return_from_runtime;
3036 __ jmp(&handle_special_cases, Label::kNear); 3151 StubRuntimeCallHelper callhelper;
3037 3152 __ bind(&generic_runtime);
3038 __ bind(&base_not_smi); 3153 // xmm1: base
3039 __ cmp(FieldOperand(edx, HeapObject::kMapOffset), 3154 // xmm2: exponent
3040 factory->heap_number_map()); 3155 {
3041 __ j(not_equal, &call_runtime); 3156 AllowExternalCallThatCantCauseGC scope(masm);
3042 __ mov(ecx, FieldOperand(edx, HeapNumber::kExponentOffset)); 3157 __ PrepareCallCFunction(4, eax);
3043 __ and_(ecx, HeapNumber::kExponentMask); 3158 __ movdbl(Operand(esp, 0 * kDoubleSize), xmm1);
3044 __ cmp(ecx, Immediate(HeapNumber::kExponentMask)); 3159 __ movdbl(Operand(esp, 1 * kDoubleSize), xmm2);
3045 // base is NaN or +/-Infinity 3160 __ CallCFunction(
3046 __ j(greater_equal, &call_runtime); 3161 ExternalReference::power_double_double_function(masm->isolate()), 4);
3047 __ movdbl(xmm0, FieldOperand(edx, HeapNumber::kValueOffset)); 3162 }
3048 3163 __ jmp(&return_from_runtime, Label::kNear);
3049 // base is in xmm0 and exponent is in xmm1. 3164
3050 __ bind(&handle_special_cases); 3165 __ bind(&double_int_runtime);
3051 Label not_minus_half; 3166 // xmm4: base
3052 // Test for -0.5. 3167 // ecx: exponent
3053 // Load xmm2 with -0.5. 3168 {
3054 __ mov(ecx, Immediate(0xBF000000)); 3169 __ PrepareCallCFunction(4, eax);
3055 __ movd(xmm2, ecx); 3170 __ movdbl(Operand(esp, 0 * kDoubleSize), xmm4);
3056 __ cvtss2sd(xmm2, xmm2); 3171 __ mov(Operand(esp, 1 * kDoubleSize), ecx);
3057 // xmm2 now has -0.5. 3172 AllowExternalCallThatCantCauseGC scope(masm);
3058 __ ucomisd(xmm2, xmm1); 3173 __ CallCFunction(
3059 __ j(not_equal, &not_minus_half, Label::kNear); 3174 ExternalReference::power_double_int_function(masm->isolate()), 4);
3060 3175 }
3061 // Calculates reciprocal of square root. 3176
3062 // sqrtsd returns -0 when input is -0. ECMA spec requires +0. 3177 __ bind(&return_from_runtime);
3063 __ xorps(xmm1, xmm1); 3178 // Return value is in st(0) on ia32.
3064 __ addsd(xmm1, xmm0); 3179 // Store it into the (fixed) result register.
3065 __ sqrtsd(xmm1, xmm1); 3180 __ sub(esp, Immediate(kDoubleSize));
3066 __ divsd(xmm3, xmm1); 3181 __ fstp_d(Operand(esp, 0));
3067 __ movsd(xmm1, xmm3); 3182 __ movdbl(xmm3, Operand(esp, 0));
3068 __ jmp(&allocate_return); 3183 __ add(esp, Immediate(kDoubleSize));
3069 3184
3070 // Test for 0.5. 3185 // We expect the result in xmm3.
3071 __ bind(&not_minus_half); 3186 __ bind(&done);
3072 // Load xmm2 with 0.5. 3187 __ ret(0);
3073 // Since xmm3 is 1 and xmm2 is -0.5 this is simply xmm2 + xmm3. 3188 }
3074 __ addsd(xmm2, xmm3);
3075 // xmm2 now has 0.5.
3076 __ ucomisd(xmm2, xmm1);
3077 __ j(not_equal, &call_runtime);
3078 // Calculates square root.
3079 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.
3080 __ xorps(xmm1, xmm1);
3081 __ addsd(xmm1, xmm0);
3082 __ sqrtsd(xmm1, xmm1);
3083
3084 __ bind(&allocate_return);
3085 __ AllocateHeapNumber(ecx, eax, edx, &call_runtime);
3086 __ movdbl(FieldOperand(ecx, HeapNumber::kValueOffset), xmm1);
3087 __ mov(eax, ecx);
3088 __ ret(2 * kPointerSize);
3089
3090 __ bind(&call_runtime);
3091 __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
3092 } 3189 }
3093 3190
3094 3191
3095 void ArgumentsAccessStub::GenerateReadElement(MacroAssembler* masm) { 3192 void ArgumentsAccessStub::GenerateReadElement(MacroAssembler* masm) {
3096 // The key is in edx and the parameter count is in eax. 3193 // The key is in edx and the parameter count is in eax.
3097 3194
3098 // The displacement is used for skipping the frame pointer on the 3195 // The displacement is used for skipping the frame pointer on the
3099 // stack. It is the offset of the last parameter (if any) relative 3196 // stack. It is the offset of the last parameter (if any) relative
3100 // to the frame pointer. 3197 // to the frame pointer.
3101 static const int kDisplacement = 1 * kPointerSize; 3198 static const int kDisplacement = 1 * kPointerSize;
(...skipping 4069 matching lines...) Expand 10 before | Expand all | Expand 10 after
7171 false); 7268 false);
7172 __ pop(edx); 7269 __ pop(edx);
7173 __ ret(0); 7270 __ ret(0);
7174 } 7271 }
7175 7272
7176 #undef __ 7273 #undef __
7177 7274
7178 } } // namespace v8::internal 7275 } } // namespace v8::internal
7179 7276
7180 #endif // V8_TARGET_ARCH_IA32 7277 #endif // V8_TARGET_ARCH_IA32
OLDNEW
« no previous file with comments | « src/ia32/assembler-ia32.cc ('k') | src/ia32/disasm-ia32.cc » ('j') | test/mjsunit/math-pow.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698