src/ia32/codegen-ia32.cc - Issue 652041: IA32: Native access to TranscendentalCache for sin/cos.

Side by Side Diff: src/ia32/codegen-ia32.cc

Issue 652041: IA32: Native access to TranscendentalCache for sin/cos. (Closed)

Patch Set: Updated to head. Removed dead code. Ignore first patch. Created 10 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 5807 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5818 ASSERT_EQ(args->length(), 1);	5818 ASSERT_EQ(args->length(), 1);

5819	5819

5820 // Load the argument on the stack and call the stub.	5820 // Load the argument on the stack and call the stub.

5821 Load(args->at(0));	5821 Load(args->at(0));

5822 NumberToStringStub stub;	5822 NumberToStringStub stub;

5823 Result result = frame_->CallStub(&stub, 1);	5823 Result result = frame_->CallStub(&stub, 1);

5824 frame_->Push(&result);	5824 frame_->Push(&result);

5825 }	5825 }

5826	5826

5827	5827

	5828 void CodeGenerator::GenerateMathSin(ZoneList<Expression> args) {

	5829 ASSERT_EQ(args->length(), 1);

	5830 Load(args->at(0));

	5831 TranscendentalCacheStub stub(TranscendentalCache::SIN);

	5832 Result result = frame_->CallStub(&stub, 1);

	5833 frame_->Push(&result);

	5834 }

	5835

	5836

	5837 void CodeGenerator::GenerateMathCos(ZoneList<Expression> args) {

	5838 ASSERT_EQ(args->length(), 1);

	5839 Load(args->at(0));

	5840 TranscendentalCacheStub stub(TranscendentalCache::COS);

	5841 Result result = frame_->CallStub(&stub, 1);

	5842 frame_->Push(&result);

	5843 }

	5844

	5845

5828 void CodeGenerator::VisitCallRuntime(CallRuntime* node) {	5846 void CodeGenerator::VisitCallRuntime(CallRuntime* node) {

5829 if (CheckForInlineRuntimeCall(node)) {	5847 if (CheckForInlineRuntimeCall(node)) {

5830 return;	5848 return;

5831 }	5849 }

5832	5850

5833 ZoneList<Expression> args = node->arguments();	5851 ZoneList<Expression> args = node->arguments();

5834 Comment cmnt(masm_, "[ CallRuntime");	5852 Comment cmnt(masm_, "[ CallRuntime");

5835 Runtime::Function* function = node->function();	5853 Runtime::Function* function = node->function();

5836	5854

5837 if (function == NULL) {	5855 if (function == NULL) {

(...skipping 2278 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
8116 // If arguments are not passed in registers remove them from the stack before	8134 // If arguments are not passed in registers remove them from the stack before

8117 // returning.	8135 // returning.

8118 if (!HasArgsInRegisters()) {	8136 if (!HasArgsInRegisters()) {

8119 __ ret(2 * kPointerSize); // Remove both operands	8137 __ ret(2 * kPointerSize); // Remove both operands

8120 } else {	8138 } else {

8121 __ ret(0);	8139 __ ret(0);

8122 }	8140 }

8123 }	8141 }

8124	8142

8125	8143

	8144 void TranscendentalCacheStub::Generate(MacroAssembler* masm) {

	8145 // Input on stack:

	8146 // esp[4]: argument (should be number).

	8147 // esp[0]: return address.

	8148 // Test that eax is a number.

	8149 Label runtime_call;

	8150 Label runtime_call_clear_stack;

	8151 Label input_not_smi;

	8152 Label loaded;

	8153 __ mov(eax, Operand(esp, kPointerSize));

	8154 __ test(eax, Immediate(kSmiTagMask));

	8155 __ j(not_zero, &input_not_smi);

	8156 // Input is a smi. Untag and load it onto the FPU stack.

	8157 // Then load the low and high words of the double into ebx, edx.

	8158 ASSERT_EQ(1, kSmiTagSize);

	8159 __ sar(eax, 1);

	8160 __ sub(Operand(esp), Immediate(2 * kPointerSize));

	8161 __ mov(Operand(esp, 0), eax);

	8162 __ fild_s(Operand(esp, 0));

	8163 __ fst_d(Operand(esp, 0));

	8164 __ pop(edx);

	8165 __ pop(ebx);

	8166 __ jmp(&loaded);

	8167 __ bind(&input_not_smi);

	8168 // Check if input is a HeapNumber.

	8169 __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));

	8170 __ cmp(Operand(ebx), Immediate(Factory::heap_number_map()));

	8171 __ j(not_equal, &runtime_call);

	8172 // Input is a HeapNumber. Push it on the FPU stack and load its

	8173 // low and high words into ebx, edx.

	8174 __ fld_d(FieldOperand(eax, HeapNumber::kValueOffset));
	fschneider 2010/02/22 17:42:54 Just an idea: Could you optimize the FPU push/pop Just an idea: Could you optimize the FPU push/pop away in the fast case (input is a heap number, cache hit)? Lasse Reichstein 2010/02/23 10:18:53 Probably. I would need a flag, or two different pa Probably. I would need a flag, or two different paths, to know whether I should load the double value later. I would also need to remember the pointer to the heap number (which I can otherwise drop after these loads). In any case, fld is a very cheap operation (for the CPU, it just onloads it on the FPU and waits for it to complete there). I don't think it's worth complicating the code for.
	8175 __ mov(edx, FieldOperand(eax, HeapNumber::kExponentOffset));

	8176 __ mov(ebx, FieldOperand(eax, HeapNumber::kMantissaOffset));

	8177

	8178 __ bind(&loaded);

	8179 // ST[0] == double value

	8180 // ebx = low 32 bits of double value

	8181 // edx = high 32 bits of double value

	8182 // Compute hash:

	8183 // h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);

	8184 __ mov(ecx, ebx);

	8185 __ xor_(ecx, Operand(edx));

	8186 __ mov(eax, ecx);

	8187 __ sar(eax, 16);

	8188 __ xor_(ecx, Operand(eax));

	8189 __ mov(eax, ecx);

	8190 __ sar(eax, 8);

	8191 __ xor_(ecx, Operand(eax));

	8192 __ and_(Operand(ecx), Immediate(TranscendentalCache::kCacheSize - 1));
	fschneider 2010/02/22 17:42:54 This assumes that kCacheSize is a power of two. I' This assumes that kCacheSize is a power of two. I'd move the ASSERT from below up to here. Lasse Reichstein 2010/02/23 10:18:53 Well spotted. I moved this line up here but forgot Well spotted. I moved this line up here but forgot the assert.
	8193 // ST[0] == double value.

	8194 // ebx = low 32 bits of double value.

	8195 // edx = high 32 bits of double value.

	8196 // ecx = TranscendentalCache::hash(double value).

	8197 ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));
	fschneider 2010/02/22 17:42:54 Move this ASSERT to above. Move this ASSERT to above.
	8198 __ mov(eax,

	8199 Immediate(ExternalReference::transcendental_cache_array_address()));

	8200 // Eax points to cache array.

	8201 __ mov(eax, Operand(eax, type_ * sizeof(TranscendentalCache::caches_[0])));

	8202 // Eax points to the cache for the type type_.

	8203 // If NULL, the cache hasn't been initialized yet, so go through runtime.

	8204 __ test(eax, Operand(eax));

	8205 __ j(zero, &runtime_call_clear_stack);

	8206 #ifdef DEBUG

	8207 // Check that the layout of cache elements match expectations.

	8208 { // NOLINT - doesn't like a single brace on a line.

	8209 TranscendentalCache::Element test_elem[2];

	8210 char* elem_start = reinterpret_cast<char*>(&test_elem[0]);

	8211 char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);

	8212 char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));

	8213 char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));

	8214 char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));

	8215 CHECK_EQ(12, elem2_start - elem_start); // Two uint_32's and a pointer.

	8216 CHECK_EQ(0, elem_in0 - elem_start);

	8217 CHECK_EQ(kIntSize, elem_in1 - elem_start);

	8218 CHECK_EQ(2 * kIntSize, elem_out - elem_start);

	8219 }

	8220 #endif

	8221 // Find the address of the ecx'th entry in the cache, i.e., &eax[ecx*12].

	8222 __ lea(ecx, Operand(ecx, ecx, times_2, 0));

	8223 __ lea(ecx, Operand(eax, ecx, times_4, 0));

	8224 // Check if cache matches: Double value is stored in uint32_t[2] array.

	8225 Label cache_miss;

	8226 __ cmp(ebx, Operand(ecx, 0));

	8227 __ j(not_equal, &cache_miss);

	8228 __ cmp(edx, Operand(ecx, kIntSize)); // NOLINT
	fschneider 2010/02/22 17:42:54 Isn't this always half the size of a double (32 bi Isn't this always half the size of a double (32 bits)? (even on x64) Lasse Reichstein 2010/02/23 10:18:53 It should be. The cache element holds two integers It should be. The cache element holds two integers, which together should be all the bits of the double. The kIntSize here matches the layout test in the DEBUG section above. The NOLINT should go, in any case. It no longer reads sizeof(int32_t).
	8229 __ j(not_equal, &cache_miss);

	8230 // Cache hit!

	8231 __ mov(eax, Operand(ecx, 2 * kIntSize)); // NOLINT

	8232 __ fstp(0);
	fschneider 2010/02/22 17:42:54 Could this pop() of the FPU stack go away? (see my Could this pop() of the FPU stack go away? (see my idea comment above) ebx and edx still contain the original double value, if I understand correct? Lasse Reichstein 2010/02/23 10:18:53 I don't think it's worth it. In the smi case, I ne I don't think it's worth it. In the smi case, I need to convert the smi to a double, so the fldi does double service by both converting and pushing the value. The fstp operation is very quick as well (often just 1 cycle on modern chips).
	8233 __ ret(kPointerSize);

	8234

	8235 __ bind(&cache_miss);

	8236 // Update cache with new value.

	8237 // We are short on registers, so use no_reg as scratch.

	8238 // This gives slightly larger code.

	8239 __ AllocateHeapNumber(eax, edi, no_reg, &runtime_call_clear_stack);

	8240 GenerateOperation(masm);

	8241 __ mov(Operand(ecx, 0), ebx);

	8242 __ mov(Operand(ecx, sizeof(uint32_t)), edx); // NOLINT

	8243 __ mov(Operand(ecx, sizeof(uint32_t[2])), eax); // NOLINT

	8244 __ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));

	8245 __ ret(kPointerSize);

	8246

	8247 __ bind(&runtime_call_clear_stack);

	8248 __ fstp(0);

	8249 __ bind(&runtime_call);

	8250 __ TailCallRuntime(ExternalReference(RuntimeFunction()), 1, 1);

	8251 }

	8252

	8253

	8254 Runtime::FunctionId TranscendentalCacheStub::RuntimeFunction() {

	8255 switch (type_) {

	8256 // Add more cases when necessary.

	8257 case TranscendentalCache::SIN: return Runtime::kMath_sin;

	8258 case TranscendentalCache::COS: return Runtime::kMath_cos;

	8259 default:

	8260 UNIMPLEMENTED();

	8261 return Runtime::kAbort;

	8262 }

	8263 }

	8264

	8265

	8266 void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {

	8267 // Only free register is edi.

	8268 Label done;

	8269 switch (type_) {

	8270 case TranscendentalCache::SIN:

	8271 case TranscendentalCache::COS: {
	fschneider 2010/02/22 17:42:54 Are there potentially more types of transcendental Are there potentially more types of transcendental caches? switch(type_) {...} does not seem necessary here. Maybe just ASSERT(type_ == TranscendentalCache::SIN \|\| type_ == TranscendentalCache::COS) so that we don't pay extra in release mode. Lasse Reichstein 2010/02/23 10:18:53 There are potentially more, some of which won't ne There are potentially more, some of which won't need the same setup as the 2PI periodic ones, but there's no need for the generality yet. I'll make it into an ASSERT and make a comment about future extensions.
	8272 // Both fsin and fcos require arguments in the range +/-2^63 and

	8273 // return NaN for infinities and NaN. They can share all code except

	8274 // the actual fsin/fcos operation.

	8275 Label in_range;

	8276 // If argument is outside the range -2^63..2^63, fsin/cos doesn't

	8277 // work. We must reduce it to the appropriate range.

	8278 __ mov(edi, edx);

	8279 __ and_(Operand(edi), Immediate(0x7ff00000)); // Exponent only.

	8280 int supported_exponent_limit =

	8281 (63 + HeapNumber::kExponentBias) << HeapNumber::kExponentShift;

	8282 __ cmp(Operand(edi), Immediate(supported_exponent_limit));

	8283 __ j(below, &in_range, taken);

	8284 // Check for infinity and NaN. Both return NaN for sin.

	8285 __ cmp(Operand(edi), Immediate(0x7ff00000));

	8286 Label non_nan_result;

	8287 __ j(not_equal, &non_nan_result, taken);

	8288 // Input is +/-Infinity or NaN. Result is NaN.

	8289 __ fstp(0);

	8290 // NaN is represented by 0x7ff8000000000000.

	8291 __ push(Immediate(0x7ff80000));

	8292 __ push(Immediate(0));

	8293 __ fld_d(Operand(esp, 0));

	8294 __ add(Operand(esp), Immediate(2 * kPointerSize));

	8295 __ jmp(&done);

	8296

	8297 __ bind(&non_nan_result);

	8298

	8299 // Use fpmod to restrict argument to the range +/-2*PI.

	8300 __ mov(edi, eax); // Save eax before using fnstsw_ax.

	8301 __ fldpi();

	8302 __ fadd(0);

	8303 __ fld(1);

	8304 // FPU Stack: input, 2*pi, input.

	8305 {

	8306 Label no_exceptions;

	8307 __ fwait();

	8308 __ fnstsw_ax();

	8309 // Clear if Illegal Operand or Zero Division exceptions are set.

	8310 __ test(Operand(eax), Immediate(5));

	8311 __ j(zero, &no_exceptions);

	8312 __ fnclex();

	8313 __ bind(&no_exceptions);

	8314 }

	8315

	8316 // Compute st(0) % st(1)

	8317 {

	8318 Label partial_remainder_loop;

	8319 __ bind(&partial_remainder_loop);

	8320 __ fprem();
	fschneider 2010/02/22 17:42:54 Is there a reason for not using fprem1()? Is there a reason for not using fprem1()? Lasse Reichstein 2010/02/23 10:18:53 It is slightly slower on some chips, but it also g It is slightly slower on some chips, but it also gives a slightly more precise result. I guess the tradeof should go in the direction of precission in this case. I'll change it to fprem1.
	8321 __ fwait();

	8322 __ fnstsw_ax();

	8323 __ test(Operand(eax), Immediate(0x400 /* C2 */));

	8324 // If C2 is set, computation only has partial result. Loop to

	8325 // continue computation.

	8326 __ j(not_zero, &partial_remainder_loop);

	8327 }

	8328 // FPU Stack: input, 2pi, input % 2pi

	8329 __ fstp(2);

	8330 __ fstp(0);

	8331 __ mov(eax, edi); // Restore eax (allocated HeapNumber pointer).

	8332

	8333 // FPU Stack: input % 2*pi

	8334 __ bind(&in_range);

	8335 switch (type_) {

	8336 case TranscendentalCache::SIN:

	8337 __ fsin();

	8338 break;

	8339 case TranscendentalCache::COS:

	8340 __ fcos();

	8341 break;

	8342 default:

	8343 UNREACHABLE();

	8344 }

	8345 break;

	8346 }

	8347 default:

	8348 UNIMPLEMENTED();

	8349 }

	8350 __ bind(&done);

	8351 }

	8352

	8353

8126 // Get the integer part of a heap number. Surprisingly, all this bit twiddling	8354 // Get the integer part of a heap number. Surprisingly, all this bit twiddling

8127 // is faster than using the built-in instructions on floating point registers.	8355 // is faster than using the built-in instructions on floating point registers.

8128 // Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the	8356 // Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the

8129 // trashed registers.	8357 // trashed registers.

8130 void IntegerConvert(MacroAssembler* masm,	8358 void IntegerConvert(MacroAssembler* masm,

8131 Register source,	8359 Register source,

8132 bool use_sse3,	8360 bool use_sse3,

8133 Label* conversion_failure) {	8361 Label* conversion_failure) {

8134 ASSERT(!source.is(ecx) && !source.is(edi) && !source.is(ebx));	8362 ASSERT(!source.is(ecx) && !source.is(edi) && !source.is(ebx));

8135 Label done, right_exponent, normal_exponent;	8363 Label done, right_exponent, normal_exponent;

(...skipping 2657 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
10793	11021

10794 // Call the runtime; it returns -1 (less), 0 (equal), or 1 (greater)	11022 // Call the runtime; it returns -1 (less), 0 (equal), or 1 (greater)

10795 // tagged as a small integer.	11023 // tagged as a small integer.

10796 __ bind(&runtime);	11024 __ bind(&runtime);

10797 __ TailCallRuntime(ExternalReference(Runtime::kStringCompare), 2, 1);	11025 __ TailCallRuntime(ExternalReference(Runtime::kStringCompare), 2, 1);

10798 }	11026 }

10799	11027

10800 #undef __	11028 #undef __

10801	11029

10802 } } // namespace v8::internal	11030 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/ia32/codegen-ia32.h ('k') | src/ia32/disasm-ia32.cc » ('j') | src/ia32/disasm-ia32.cc » ('J')