src/ia32/code-stubs-ia32.cc - Issue 8749002: Implement Math.pow using FPU instructions and inline it in crankshaft (ia32).

Side by Side Diff: src/ia32/code-stubs-ia32.cc

Issue 8749002: Implement Math.pow using FPU instructions and inline it in crankshaft (ia32). (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: remove unnecessary heap number check in crankshaft code Created 9 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 2920 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2931 }	2931 }

2932	2932

2933	2933

2934 void FloatingPointHelper::CheckFloatOperandsAreInt32(MacroAssembler* masm,	2934 void FloatingPointHelper::CheckFloatOperandsAreInt32(MacroAssembler* masm,

2935 Label* non_int32) {	2935 Label* non_int32) {

2936 return;	2936 return;

2937 }	2937 }

2938	2938

2939	2939

2940 void MathPowStub::Generate(MacroAssembler* masm) {	2940 void MathPowStub::Generate(MacroAssembler* masm) {

2941 // Registers are used as follows:

2942 // edx = base

2943 // eax = exponent

2944 // ecx = temporary, result

2945

2946 CpuFeatures::Scope use_sse2(SSE2);	2941 CpuFeatures::Scope use_sse2(SSE2);

2947 Label allocate_return, call_runtime;	2942 Factory* factory = masm->isolate()->factory();

2948	2943 Label double_int_runtime, generic_runtime, done;

2949 // Load input parameters.	2944 Label base_is_smi, unpack_exponent, exponent_not_smi, int_exponent;

2950 __ mov(edx, Operand(esp, 2 * kPointerSize));

2951 __ mov(eax, Operand(esp, 1 * kPointerSize));

2952

2953 // Save 1 in xmm3 - we need this several times later on.	2945 // Save 1 in xmm3 - we need this several times later on.

2954 __ mov(ecx, Immediate(1));	2946 __ mov(ecx, Immediate(1));

2955 __ cvtsi2sd(xmm3, ecx);	2947 __ cvtsi2sd(xmm3, ecx);

2956	2948

2957 Label exponent_nonsmi;	2949 switch (exponent_type_) {

2958 Label base_nonsmi;	2950 case ON_STACK:

2959 // If the exponent is a heap number go to that specific case.	2951 // Load input parameters from stack

2960 __ JumpIfNotSmi(eax, &exponent_nonsmi);	2952 __ mov(edx, Operand(esp, 2 * kPointerSize));

2961 __ JumpIfNotSmi(edx, &base_nonsmi);	2953 __ mov(eax, Operand(esp, 1 * kPointerSize));

2962	2954 // edx: base (smi or heap number)

2963 // Optimized version when both exponent and base are smis.	2955 // eax: exponent (smi or heap number)

2964 Label powi;	2956 __ JumpIfSmi(edx, &base_is_smi, Label::kNear);

2965 __ SmiUntag(edx);	2957 __ cmp(FieldOperand(edx, HeapObject::kMapOffset),

2966 __ cvtsi2sd(xmm0, edx);	2958 factory->heap_number_map());

2967 __ jmp(&powi);	2959 __ j(not_equal, &generic_runtime);

2968 // exponent is smi and base is a heapnumber.	2960

2969 __ bind(&base_nonsmi);	2961 // Check base for NaN or +/-Infinity

2970 Factory* factory = masm->isolate()->factory();	2962 __ mov(ecx, FieldOperand(edx, HeapNumber::kExponentOffset));

2971 __ cmp(FieldOperand(edx, HeapObject::kMapOffset),	2963 __ and_(ecx, HeapNumber::kExponentMask);

2972 factory->heap_number_map());	2964 __ cmp(ecx, Immediate(HeapNumber::kExponentMask));

2973 __ j(not_equal, &call_runtime);	2965 __ j(greater_equal, &generic_runtime);
	ulan 2011/12/01 18:11:26 j(equal) seems to be sufficient as ecx cannot be g j(equal) seems to be sufficient as ecx cannot be greater than kExponentMask.
2974	2966 __ movdbl(xmm1, FieldOperand(edx, HeapNumber::kValueOffset));

2975 __ movdbl(xmm0, FieldOperand(edx, HeapNumber::kValueOffset));	2967

2976	2968 __ jmp(&unpack_exponent, Label::kNear);
	ulan 2011/12/01 18:11:26 Move the jump before the empty line? Move the jump before the empty line?
2977 // Optimized version of pow if exponent is a smi.	2969 __ bind(&base_is_smi);

2978 // xmm0 contains the base.	2970 __ SmiUntag(edx);

2979 __ bind(&powi);	2971 __ cvtsi2sd(xmm1, edx);

2980 __ SmiUntag(eax);	2972 __ bind(&unpack_exponent);

2981	2973 // Fall through is intended.

	2974 case TAGGED:

	2975 // xmm1: base as double

	2976 // eax: exponent (smi or heap number)

	2977 __ JumpIfNotSmi(eax, &exponent_not_smi, Label::kNear);

	2978 __ SmiUntag(eax);

	2979 __ jmp(&int_exponent);

	2980 __ bind(&exponent_not_smi);

	2981 if (exponent_type_ == ON_STACK) {
	ulan 2011/12/01 18:11:26 I wonder if copy-pasting and keeping the ON_STACK I wonder if copy-pasting and keeping the ON_STACK and TAGGED cases disjoint would be better.
	2982 // Heap number check not necessary in optimized code as we will have

	2983 // already deoptimized if eax was neither smi nor heap number.

	2984 __ cmp(FieldOperand(eax, HeapObject::kMapOffset),

	2985 factory->heap_number_map());

	2986 __ j(not_equal, &generic_runtime);

	2987 }

	2988 __ movdbl(xmm2, FieldOperand(eax, HeapNumber::kValueOffset));

	2989 break;

	2990 case INTEGER:

	2991 // xmm1: base as double

	2992 // eax: exponent as untagged integer

	2993 case DOUBLE:

	2994 // xmm1: base as double

	2995 // xmm2: exponent as double

	2996 // Check base for NaN or +/-Infinity

	2997 if (CpuFeatures::IsSupported(SSE4_1)) {

	2998 __ extractps(ecx, xmm1, 4 * kBitsPerByte);
	ulan 2011/12/01 18:11:26 Since we already use HeapNumber::kExponentMask bel Since we already use HeapNumber::kExponentMask below, maybe replace 4 by (HeapNumber::kExponentOffset - HeapNumber::kMantissaOffset)?
	2999 } else {

	3000 __ movsd(xmm4, xmm1);

	3001 __ psrlq(xmm4, 4 * kBitsPerByte);

	3002 __ movd(ecx, xmm4);

	3003 }

	3004 __ and_(ecx, HeapNumber::kExponentMask);

	3005 __ cmp(ecx, Immediate(HeapNumber::kExponentMask));

	3006 __ j(greater_equal, &generic_runtime);

	3007 break;

	3008 default:

	3009 UNREACHABLE();

	3010 }

	3011 if (exponent_type_ != INTEGER) {

	3012 Label not_minus_half, fast_power;

	3013 // xmm1: base as double that is not +/- Infinity or NaN

	3014 // xmm2: exponent as double

	3015 // Detect integer exponents stored as double.

	3016 __ cvttsd2si(eax, Operand(xmm2));

	3017 __ cmp(eax, Immediate(0x80000000)); // Skip to runtime if possibly NaN.

	3018 __ j(equal, &generic_runtime);

	3019 __ cvtsi2sd(xmm4, eax);

	3020 __ ucomisd(xmm2, xmm4);

	3021 __ j(equal, &int_exponent);

	3022

	3023 // Detect square root case.

	3024 // Test for -0.5.

	3025 // Load xmm4 with -0.5.

	3026 __ mov(ecx, Immediate(0xBF000000));

	3027 __ movd(xmm4, ecx);

	3028 __ cvtss2sd(xmm4, xmm4);

	3029 // xmm3 now has -0.5.
	ulan 2011/12/01 18:11:26 xmm4 now has -0.5. xmm4 now has -0.5.
	3030 __ ucomisd(xmm4, xmm2);

	3031 __ j(not_equal, &not_minus_half, Label::kNear);

	3032

	3033 // Calculates reciprocal of square root.eax

	3034 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.

	3035 __ xorps(xmm2, xmm2);

	3036 __ addsd(xmm2, xmm1);

	3037 __ sqrtsd(xmm2, xmm2);

	3038 __ divsd(xmm3, xmm2);

	3039 __ jmp(&done);

	3040 __ extractps(ecx, xmm1, 4 * kBitsPerByte);
	ulan 2011/12/01 18:11:26 Redundant instruction. Redundant instruction.
	3041 // Test for 0.5.

	3042 __ bind(&not_minus_half);

	3043 // Load xmm2 with 0.5.

	3044 // Since xmm3 is 1 and xmm4 is -0.5 this is simply xmm4 + xmm3.

	3045 __ addsd(xmm4, xmm3);

	3046 // xmm2 now has 0.5.

	3047 __ ucomisd(xmm4, xmm2);

	3048 __ j(not_equal, &fast_power, Label::kNear);

	3049 // Calculates square root.

	3050 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.

	3051 __ xorps(xmm4, xmm4);

	3052 __ addsd(xmm4, xmm1);

	3053 __ sqrtsd(xmm3, xmm4);

	3054 __ jmp(&done);

	3055

	3056 // Using FPU instructions to calculate power.

	3057 Label fast_power_failed;

	3058 __ bind(&fast_power);

	3059 // Transfer (B)ase and (E)xponent onto the FPU register stack.

	3060 __ sub(esp, Immediate(kDoubleSize));

	3061 __ movdbl(Operand(esp, 0), xmm2);

	3062 __ fld_d(Operand(esp, 0)); // E

	3063 __ movdbl(Operand(esp, 0), xmm1);

	3064 __ fld_d(Operand(esp, 0)); // B, E

	3065

	3066 // Exponent is in st(1) and base is in st(0)

	3067 // B ^ E = (2^(E * log2(B)) - 1) + 1 = (2^X - 1) + 1 for X = E * log2(B)

	3068 // FYL2X calculates st(1) * log2(st(0))

	3069 __ fyl2x(); // X

	3070 __ fld(0); // X, X

	3071 __ frndint(); // rnd(X), X

	3072 __ fsub(1); // rnd(X), X-rnd(X)

	3073 __ fxch(1); // X - rnd(X), rnd(X)

	3074 // F2XM1 calculates 2^st(0) - 1 for -1 < st(0) < 1

	3075 __ f2xm1(); // 2^(X-rnd(X)) - 1, rnd(X)

	3076 __ fld1(); // 1, 2^(X-rnd(X)) - 1, rnd(X)

	3077 __ faddp(1); // 1, 2^(X-rnd(X)), rnd(X)

	3078 // FSCALE calculates st(0) * 2^st(1)

	3079 __ fscale(); // 2^X, rnd(X)

	3080 __ fstp(1);

	3081 // Bail out to runtime in case of exceptions in the status word.

	3082 __ fnstsw_ax();

	3083 __ test_b(eax, 0x5F);

	3084 __ j(not_zero, &fast_power_failed, Label::kNear);

	3085 __ fstp_d(Operand(esp, 0));

	3086 __ movdbl(xmm3, Operand(esp, 0));

	3087 __ add(esp, Immediate(kDoubleSize));

	3088 __ jmp(&done);

	3089

	3090 __ bind(&fast_power_failed);

	3091 __ fninit();

	3092 __ add(esp, Immediate(kDoubleSize));

	3093 __ jmp(&generic_runtime);

	3094 }

	3095

	3096 // Calculate power with integer exponent.

	3097 __ bind(&int_exponent);

	3098 // xmm0: base as double that is not +/- Infinity or NaN
	ulan 2011/12/01 18:11:26 xmm0 -> xmm1 xmm0 -> xmm1
	3099 // eax: exponent as untagged integer

2982 // Save exponent in base as we need to check if exponent is negative later.	3100 // Save exponent in base as we need to check if exponent is negative later.
	ulan 2011/12/01 18:11:26 base -> ecx in the comment base -> ecx in the comment
2983 // We know that base and exponent are in different registers.	3101 // We know that base and exponent are in different registers.

2984 __ mov(edx, eax);	3102 __ mov(ecx, eax); // Back up exponent.

	3103 __ movsd(xmm4, xmm1); // Back up base.

	3104 __ movsd(xmm2, xmm3); // Load xmm2 with 1.

2985	3105

2986 // Get absolute value of exponent.	3106 // Get absolute value of exponent.

2987 Label no_neg;	3107 Label no_neg, while_true, no_multiply;

2988 __ cmp(eax, 0);	3108 __ cmp(eax, 0);

2989 __ j(greater_equal, &no_neg, Label::kNear);	3109 __ j(greater_equal, &no_neg, Label::kNear);

2990 __ neg(eax);	3110 __ neg(eax);

2991 __ bind(&no_neg);	3111 __ bind(&no_neg);

2992	3112

2993 // Load xmm1 with 1.

2994 __ movsd(xmm1, xmm3);

2995 Label while_true;

2996 Label no_multiply;

2997

2998 __ bind(&while_true);	3113 __ bind(&while_true);

2999 __ shr(eax, 1);	3114 __ shr(eax, 1);

3000 __ j(not_carry, &no_multiply, Label::kNear);	3115 __ j(not_carry, &no_multiply, Label::kNear);

3001 __ mulsd(xmm1, xmm0);	3116 __ mulsd(xmm3, xmm1);

3002 __ bind(&no_multiply);	3117 __ bind(&no_multiply);

3003 __ mulsd(xmm0, xmm0);	3118 __ mulsd(xmm1, xmm1);

3004 __ j(not_zero, &while_true);	3119 __ j(not_zero, &while_true);

3005	3120

3006 // base has the original value of the exponent - if the exponent is	3121 // base has the original value of the exponent - if the exponent is
	ulan 2011/12/01 18:11:26 base -> ecx in the comment base -> ecx in the comment
3007 // negative return 1/result.	3122 // negative return 1/result.

3008 __ test(edx, edx);	3123 __ test(ecx, ecx);

3009 __ j(positive, &allocate_return);	3124 __ j(positive, &done);

3010 // Special case if xmm1 has reached infinity.	3125 // Special case if xmm3 has reached infinity.

3011 __ mov(ecx, Immediate(0x7FB00000));	3126 __ mov(eax, Immediate(0x7F800000));

3012 __ movd(xmm0, ecx);	3127 __ movd(xmm1, eax);

3013 __ cvtss2sd(xmm0, xmm0);	3128 __ cvtss2sd(xmm1, xmm1);

3014 __ ucomisd(xmm0, xmm1);	3129 __ ucomisd(xmm1, xmm3);

3015 __ j(equal, &call_runtime);	3130 __ j(equal, &double_int_runtime);

3016 __ divsd(xmm3, xmm1);	3131 __ divsd(xmm2, xmm3);

3017 __ movsd(xmm1, xmm3);	3132 __ movsd(xmm3, xmm2);

3018 __ jmp(&allocate_return);	3133

3019	3134 // Returning or bailing out.

3020 // exponent (or both) is a heapnumber - no matter what we should now work	3135 if (exponent_type_ == ON_STACK) {

3021 // on doubles.	3136 // We expect the result as heap number in eax.

3022 __ bind(&exponent_nonsmi);	3137 __ bind(&done);

3023 __ cmp(FieldOperand(eax, HeapObject::kMapOffset),	3138 // xmm1: result
	ulan 2011/12/01 18:11:26 xmm1 -> xmm3 in the comment. xmm1 -> xmm3 in the comment.
3024 factory->heap_number_map());	3139 __ AllocateHeapNumber(eax, ecx, edx, &generic_runtime);

3025 __ j(not_equal, &call_runtime);	3140 __ movdbl(FieldOperand(eax, HeapNumber::kValueOffset), xmm3);

3026 __ movdbl(xmm1, FieldOperand(eax, HeapNumber::kValueOffset));	3141 __ ret(2 * kPointerSize);

3027 // Test if exponent is nan.	3142

3028 __ ucomisd(xmm1, xmm1);	3143 // The arguments are still on the stack.

3029 __ j(parity_even, &call_runtime);	3144 __ bind(&generic_runtime);

3030	3145 __ bind(&double_int_runtime);

3031 Label base_not_smi;	3146 __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);

3032 Label handle_special_cases;	3147 } else {

3033 __ JumpIfNotSmi(edx, &base_not_smi, Label::kNear);	3148 __ jmp(&done);

3034 __ SmiUntag(edx);	3149

3035 __ cvtsi2sd(xmm0, edx);	3150 Label return_from_runtime;

3036 __ jmp(&handle_special_cases, Label::kNear);	3151 StubRuntimeCallHelper callhelper;

3037	3152 __ bind(&generic_runtime);

3038 __ bind(&base_not_smi);	3153 // xmm1: base

3039 __ cmp(FieldOperand(edx, HeapObject::kMapOffset),	3154 // xmm2: exponent

3040 factory->heap_number_map());	3155 {

3041 __ j(not_equal, &call_runtime);	3156 AllowExternalCallThatCantCauseGC scope(masm);

3042 __ mov(ecx, FieldOperand(edx, HeapNumber::kExponentOffset));	3157 __ PrepareCallCFunction(4, eax);

3043 __ and_(ecx, HeapNumber::kExponentMask);	3158 __ movdbl(Operand(esp, 0 * kDoubleSize), xmm1);

3044 __ cmp(ecx, Immediate(HeapNumber::kExponentMask));	3159 __ movdbl(Operand(esp, 1 * kDoubleSize), xmm2);

3045 // base is NaN or +/-Infinity	3160 __ CallCFunction(

3046 __ j(greater_equal, &call_runtime);	3161 ExternalReference::power_double_double_function(masm->isolate()), 4);

3047 __ movdbl(xmm0, FieldOperand(edx, HeapNumber::kValueOffset));	3162 }

3048	3163 __ jmp(&return_from_runtime, Label::kNear);

3049 // base is in xmm0 and exponent is in xmm1.	3164

3050 __ bind(&handle_special_cases);	3165 __ bind(&double_int_runtime);

3051 Label not_minus_half;	3166 // xmm4: base

3052 // Test for -0.5.	3167 // ecx: exponent

3053 // Load xmm2 with -0.5.	3168 {

3054 __ mov(ecx, Immediate(0xBF000000));	3169 __ PrepareCallCFunction(4, eax);

3055 __ movd(xmm2, ecx);	3170 __ movdbl(Operand(esp, 0 * kDoubleSize), xmm4);

3056 __ cvtss2sd(xmm2, xmm2);	3171 __ mov(Operand(esp, 1 * kDoubleSize), ecx);

3057 // xmm2 now has -0.5.	3172 AllowExternalCallThatCantCauseGC scope(masm);

3058 __ ucomisd(xmm2, xmm1);	3173 __ CallCFunction(

3059 __ j(not_equal, &not_minus_half, Label::kNear);	3174 ExternalReference::power_double_int_function(masm->isolate()), 4);

3060	3175 }

3061 // Calculates reciprocal of square root.	3176

3062 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.	3177 __ bind(&return_from_runtime);

3063 __ xorps(xmm1, xmm1);	3178 // Return value is in st(0) on ia32.

3064 __ addsd(xmm1, xmm0);	3179 // Store it into the (fixed) result register.

3065 __ sqrtsd(xmm1, xmm1);	3180 __ sub(esp, Immediate(kDoubleSize));

3066 __ divsd(xmm3, xmm1);	3181 __ fstp_d(Operand(esp, 0));

3067 __ movsd(xmm1, xmm3);	3182 __ movdbl(xmm3, Operand(esp, 0));

3068 __ jmp(&allocate_return);	3183 __ add(esp, Immediate(kDoubleSize));

3069	3184

3070 // Test for 0.5.	3185 // We expect the result in xmm3.

3071 __ bind(&not_minus_half);	3186 __ bind(&done);

3072 // Load xmm2 with 0.5.	3187 __ ret(0);

3073 // Since xmm3 is 1 and xmm2 is -0.5 this is simply xmm2 + xmm3.	3188 }

3074 __ addsd(xmm2, xmm3);

3075 // xmm2 now has 0.5.

3076 __ ucomisd(xmm2, xmm1);

3077 __ j(not_equal, &call_runtime);

3078 // Calculates square root.

3079 // sqrtsd returns -0 when input is -0. ECMA spec requires +0.

3080 __ xorps(xmm1, xmm1);

3081 __ addsd(xmm1, xmm0);

3082 __ sqrtsd(xmm1, xmm1);

3083

3084 __ bind(&allocate_return);

3085 __ AllocateHeapNumber(ecx, eax, edx, &call_runtime);

3086 __ movdbl(FieldOperand(ecx, HeapNumber::kValueOffset), xmm1);

3087 __ mov(eax, ecx);

3088 __ ret(2 * kPointerSize);

3089

3090 __ bind(&call_runtime);

3091 __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);

3092 }	3189 }

3093	3190

3094	3191

3095 void ArgumentsAccessStub::GenerateReadElement(MacroAssembler* masm) {	3192 void ArgumentsAccessStub::GenerateReadElement(MacroAssembler* masm) {

3096 // The key is in edx and the parameter count is in eax.	3193 // The key is in edx and the parameter count is in eax.

3097	3194

3098 // The displacement is used for skipping the frame pointer on the	3195 // The displacement is used for skipping the frame pointer on the

3099 // stack. It is the offset of the last parameter (if any) relative	3196 // stack. It is the offset of the last parameter (if any) relative

3100 // to the frame pointer.	3197 // to the frame pointer.

3101 static const int kDisplacement = 1 * kPointerSize;	3198 static const int kDisplacement = 1 * kPointerSize;

(...skipping 4069 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
7171 false);	7268 false);

7172 __ pop(edx);	7269 __ pop(edx);

7173 __ ret(0);	7270 __ ret(0);

7174 }	7271 }

7175	7272

7176 #undef __	7273 #undef __

7177	7274

7178 } } // namespace v8::internal	7275 } } // namespace v8::internal

7179	7276

7180 #endif // V8_TARGET_ARCH_IA32	7277 #endif // V8_TARGET_ARCH_IA32

OLD	NEW

« no previous file with comments | « src/ia32/assembler-ia32.cc ('k') | src/ia32/disasm-ia32.cc » ('j') | test/mjsunit/math-pow.js » ('J')