Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(576)

Side by Side Diff: src/ia32/codegen-ia32.cc

Issue 652041: IA32: Native access to TranscendentalCache for sin/cos. (Closed)
Patch Set: Updated to head. Removed dead code. Ignore first patch. Created 10 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 5807 matching lines...) Expand 10 before | Expand all | Expand 10 after
5818 ASSERT_EQ(args->length(), 1); 5818 ASSERT_EQ(args->length(), 1);
5819 5819
5820 // Load the argument on the stack and call the stub. 5820 // Load the argument on the stack and call the stub.
5821 Load(args->at(0)); 5821 Load(args->at(0));
5822 NumberToStringStub stub; 5822 NumberToStringStub stub;
5823 Result result = frame_->CallStub(&stub, 1); 5823 Result result = frame_->CallStub(&stub, 1);
5824 frame_->Push(&result); 5824 frame_->Push(&result);
5825 } 5825 }
5826 5826
5827 5827
5828 void CodeGenerator::GenerateMathSin(ZoneList<Expression*>* args) {
5829 ASSERT_EQ(args->length(), 1);
5830 Load(args->at(0));
5831 TranscendentalCacheStub stub(TranscendentalCache::SIN);
5832 Result result = frame_->CallStub(&stub, 1);
5833 frame_->Push(&result);
5834 }
5835
5836
5837 void CodeGenerator::GenerateMathCos(ZoneList<Expression*>* args) {
5838 ASSERT_EQ(args->length(), 1);
5839 Load(args->at(0));
5840 TranscendentalCacheStub stub(TranscendentalCache::COS);
5841 Result result = frame_->CallStub(&stub, 1);
5842 frame_->Push(&result);
5843 }
5844
5845
5828 void CodeGenerator::VisitCallRuntime(CallRuntime* node) { 5846 void CodeGenerator::VisitCallRuntime(CallRuntime* node) {
5829 if (CheckForInlineRuntimeCall(node)) { 5847 if (CheckForInlineRuntimeCall(node)) {
5830 return; 5848 return;
5831 } 5849 }
5832 5850
5833 ZoneList<Expression*>* args = node->arguments(); 5851 ZoneList<Expression*>* args = node->arguments();
5834 Comment cmnt(masm_, "[ CallRuntime"); 5852 Comment cmnt(masm_, "[ CallRuntime");
5835 Runtime::Function* function = node->function(); 5853 Runtime::Function* function = node->function();
5836 5854
5837 if (function == NULL) { 5855 if (function == NULL) {
(...skipping 2278 matching lines...) Expand 10 before | Expand all | Expand 10 after
8116 // If arguments are not passed in registers remove them from the stack before 8134 // If arguments are not passed in registers remove them from the stack before
8117 // returning. 8135 // returning.
8118 if (!HasArgsInRegisters()) { 8136 if (!HasArgsInRegisters()) {
8119 __ ret(2 * kPointerSize); // Remove both operands 8137 __ ret(2 * kPointerSize); // Remove both operands
8120 } else { 8138 } else {
8121 __ ret(0); 8139 __ ret(0);
8122 } 8140 }
8123 } 8141 }
8124 8142
8125 8143
8144 void TranscendentalCacheStub::Generate(MacroAssembler* masm) {
8145 // Input on stack:
8146 // esp[4]: argument (should be number).
8147 // esp[0]: return address.
8148 // Test that eax is a number.
8149 Label runtime_call;
8150 Label runtime_call_clear_stack;
8151 Label input_not_smi;
8152 Label loaded;
8153 __ mov(eax, Operand(esp, kPointerSize));
8154 __ test(eax, Immediate(kSmiTagMask));
8155 __ j(not_zero, &input_not_smi);
8156 // Input is a smi. Untag and load it onto the FPU stack.
8157 // Then load the low and high words of the double into ebx, edx.
8158 ASSERT_EQ(1, kSmiTagSize);
8159 __ sar(eax, 1);
8160 __ sub(Operand(esp), Immediate(2 * kPointerSize));
8161 __ mov(Operand(esp, 0), eax);
8162 __ fild_s(Operand(esp, 0));
8163 __ fst_d(Operand(esp, 0));
8164 __ pop(edx);
8165 __ pop(ebx);
8166 __ jmp(&loaded);
8167 __ bind(&input_not_smi);
8168 // Check if input is a HeapNumber.
8169 __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset));
8170 __ cmp(Operand(ebx), Immediate(Factory::heap_number_map()));
8171 __ j(not_equal, &runtime_call);
8172 // Input is a HeapNumber. Push it on the FPU stack and load its
8173 // low and high words into ebx, edx.
8174 __ fld_d(FieldOperand(eax, HeapNumber::kValueOffset));
fschneider 2010/02/22 17:42:54 Just an idea: Could you optimize the FPU push/pop
Lasse Reichstein 2010/02/23 10:18:53 Probably. I would need a flag, or two different pa
8175 __ mov(edx, FieldOperand(eax, HeapNumber::kExponentOffset));
8176 __ mov(ebx, FieldOperand(eax, HeapNumber::kMantissaOffset));
8177
8178 __ bind(&loaded);
8179 // ST[0] == double value
8180 // ebx = low 32 bits of double value
8181 // edx = high 32 bits of double value
8182 // Compute hash:
8183 // h = (low ^ high); h ^= h >> 16; h ^= h >> 8; h = h & (cacheSize - 1);
8184 __ mov(ecx, ebx);
8185 __ xor_(ecx, Operand(edx));
8186 __ mov(eax, ecx);
8187 __ sar(eax, 16);
8188 __ xor_(ecx, Operand(eax));
8189 __ mov(eax, ecx);
8190 __ sar(eax, 8);
8191 __ xor_(ecx, Operand(eax));
8192 __ and_(Operand(ecx), Immediate(TranscendentalCache::kCacheSize - 1));
fschneider 2010/02/22 17:42:54 This assumes that kCacheSize is a power of two. I'
Lasse Reichstein 2010/02/23 10:18:53 Well spotted. I moved this line up here but forgot
8193 // ST[0] == double value.
8194 // ebx = low 32 bits of double value.
8195 // edx = high 32 bits of double value.
8196 // ecx = TranscendentalCache::hash(double value).
8197 ASSERT(IsPowerOf2(TranscendentalCache::kCacheSize));
fschneider 2010/02/22 17:42:54 Move this ASSERT to above.
8198 __ mov(eax,
8199 Immediate(ExternalReference::transcendental_cache_array_address()));
8200 // Eax points to cache array.
8201 __ mov(eax, Operand(eax, type_ * sizeof(TranscendentalCache::caches_[0])));
8202 // Eax points to the cache for the type type_.
8203 // If NULL, the cache hasn't been initialized yet, so go through runtime.
8204 __ test(eax, Operand(eax));
8205 __ j(zero, &runtime_call_clear_stack);
8206 #ifdef DEBUG
8207 // Check that the layout of cache elements match expectations.
8208 { // NOLINT - doesn't like a single brace on a line.
8209 TranscendentalCache::Element test_elem[2];
8210 char* elem_start = reinterpret_cast<char*>(&test_elem[0]);
8211 char* elem2_start = reinterpret_cast<char*>(&test_elem[1]);
8212 char* elem_in0 = reinterpret_cast<char*>(&(test_elem[0].in[0]));
8213 char* elem_in1 = reinterpret_cast<char*>(&(test_elem[0].in[1]));
8214 char* elem_out = reinterpret_cast<char*>(&(test_elem[0].output));
8215 CHECK_EQ(12, elem2_start - elem_start); // Two uint_32's and a pointer.
8216 CHECK_EQ(0, elem_in0 - elem_start);
8217 CHECK_EQ(kIntSize, elem_in1 - elem_start);
8218 CHECK_EQ(2 * kIntSize, elem_out - elem_start);
8219 }
8220 #endif
8221 // Find the address of the ecx'th entry in the cache, i.e., &eax[ecx*12].
8222 __ lea(ecx, Operand(ecx, ecx, times_2, 0));
8223 __ lea(ecx, Operand(eax, ecx, times_4, 0));
8224 // Check if cache matches: Double value is stored in uint32_t[2] array.
8225 Label cache_miss;
8226 __ cmp(ebx, Operand(ecx, 0));
8227 __ j(not_equal, &cache_miss);
8228 __ cmp(edx, Operand(ecx, kIntSize)); // NOLINT
fschneider 2010/02/22 17:42:54 Isn't this always half the size of a double (32 bi
Lasse Reichstein 2010/02/23 10:18:53 It should be. The cache element holds two integers
8229 __ j(not_equal, &cache_miss);
8230 // Cache hit!
8231 __ mov(eax, Operand(ecx, 2 * kIntSize)); // NOLINT
8232 __ fstp(0);
fschneider 2010/02/22 17:42:54 Could this pop() of the FPU stack go away? (see my
Lasse Reichstein 2010/02/23 10:18:53 I don't think it's worth it. In the smi case, I ne
8233 __ ret(kPointerSize);
8234
8235 __ bind(&cache_miss);
8236 // Update cache with new value.
8237 // We are short on registers, so use no_reg as scratch.
8238 // This gives slightly larger code.
8239 __ AllocateHeapNumber(eax, edi, no_reg, &runtime_call_clear_stack);
8240 GenerateOperation(masm);
8241 __ mov(Operand(ecx, 0), ebx);
8242 __ mov(Operand(ecx, sizeof(uint32_t)), edx); // NOLINT
8243 __ mov(Operand(ecx, sizeof(uint32_t[2])), eax); // NOLINT
8244 __ fstp_d(FieldOperand(eax, HeapNumber::kValueOffset));
8245 __ ret(kPointerSize);
8246
8247 __ bind(&runtime_call_clear_stack);
8248 __ fstp(0);
8249 __ bind(&runtime_call);
8250 __ TailCallRuntime(ExternalReference(RuntimeFunction()), 1, 1);
8251 }
8252
8253
8254 Runtime::FunctionId TranscendentalCacheStub::RuntimeFunction() {
8255 switch (type_) {
8256 // Add more cases when necessary.
8257 case TranscendentalCache::SIN: return Runtime::kMath_sin;
8258 case TranscendentalCache::COS: return Runtime::kMath_cos;
8259 default:
8260 UNIMPLEMENTED();
8261 return Runtime::kAbort;
8262 }
8263 }
8264
8265
8266 void TranscendentalCacheStub::GenerateOperation(MacroAssembler* masm) {
8267 // Only free register is edi.
8268 Label done;
8269 switch (type_) {
8270 case TranscendentalCache::SIN:
8271 case TranscendentalCache::COS: {
fschneider 2010/02/22 17:42:54 Are there potentially more types of transcendental
Lasse Reichstein 2010/02/23 10:18:53 There are potentially more, some of which won't ne
8272 // Both fsin and fcos require arguments in the range +/-2^63 and
8273 // return NaN for infinities and NaN. They can share all code except
8274 // the actual fsin/fcos operation.
8275 Label in_range;
8276 // If argument is outside the range -2^63..2^63, fsin/cos doesn't
8277 // work. We must reduce it to the appropriate range.
8278 __ mov(edi, edx);
8279 __ and_(Operand(edi), Immediate(0x7ff00000)); // Exponent only.
8280 int supported_exponent_limit =
8281 (63 + HeapNumber::kExponentBias) << HeapNumber::kExponentShift;
8282 __ cmp(Operand(edi), Immediate(supported_exponent_limit));
8283 __ j(below, &in_range, taken);
8284 // Check for infinity and NaN. Both return NaN for sin.
8285 __ cmp(Operand(edi), Immediate(0x7ff00000));
8286 Label non_nan_result;
8287 __ j(not_equal, &non_nan_result, taken);
8288 // Input is +/-Infinity or NaN. Result is NaN.
8289 __ fstp(0);
8290 // NaN is represented by 0x7ff8000000000000.
8291 __ push(Immediate(0x7ff80000));
8292 __ push(Immediate(0));
8293 __ fld_d(Operand(esp, 0));
8294 __ add(Operand(esp), Immediate(2 * kPointerSize));
8295 __ jmp(&done);
8296
8297 __ bind(&non_nan_result);
8298
8299 // Use fpmod to restrict argument to the range +/-2*PI.
8300 __ mov(edi, eax); // Save eax before using fnstsw_ax.
8301 __ fldpi();
8302 __ fadd(0);
8303 __ fld(1);
8304 // FPU Stack: input, 2*pi, input.
8305 {
8306 Label no_exceptions;
8307 __ fwait();
8308 __ fnstsw_ax();
8309 // Clear if Illegal Operand or Zero Division exceptions are set.
8310 __ test(Operand(eax), Immediate(5));
8311 __ j(zero, &no_exceptions);
8312 __ fnclex();
8313 __ bind(&no_exceptions);
8314 }
8315
8316 // Compute st(0) % st(1)
8317 {
8318 Label partial_remainder_loop;
8319 __ bind(&partial_remainder_loop);
8320 __ fprem();
fschneider 2010/02/22 17:42:54 Is there a reason for not using fprem1()?
Lasse Reichstein 2010/02/23 10:18:53 It is slightly slower on some chips, but it also g
8321 __ fwait();
8322 __ fnstsw_ax();
8323 __ test(Operand(eax), Immediate(0x400 /* C2 */));
8324 // If C2 is set, computation only has partial result. Loop to
8325 // continue computation.
8326 __ j(not_zero, &partial_remainder_loop);
8327 }
8328 // FPU Stack: input, 2*pi, input % 2*pi
8329 __ fstp(2);
8330 __ fstp(0);
8331 __ mov(eax, edi); // Restore eax (allocated HeapNumber pointer).
8332
8333 // FPU Stack: input % 2*pi
8334 __ bind(&in_range);
8335 switch (type_) {
8336 case TranscendentalCache::SIN:
8337 __ fsin();
8338 break;
8339 case TranscendentalCache::COS:
8340 __ fcos();
8341 break;
8342 default:
8343 UNREACHABLE();
8344 }
8345 break;
8346 }
8347 default:
8348 UNIMPLEMENTED();
8349 }
8350 __ bind(&done);
8351 }
8352
8353
8126 // Get the integer part of a heap number. Surprisingly, all this bit twiddling 8354 // Get the integer part of a heap number. Surprisingly, all this bit twiddling
8127 // is faster than using the built-in instructions on floating point registers. 8355 // is faster than using the built-in instructions on floating point registers.
8128 // Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the 8356 // Trashes edi and ebx. Dest is ecx. Source cannot be ecx or one of the
8129 // trashed registers. 8357 // trashed registers.
8130 void IntegerConvert(MacroAssembler* masm, 8358 void IntegerConvert(MacroAssembler* masm,
8131 Register source, 8359 Register source,
8132 bool use_sse3, 8360 bool use_sse3,
8133 Label* conversion_failure) { 8361 Label* conversion_failure) {
8134 ASSERT(!source.is(ecx) && !source.is(edi) && !source.is(ebx)); 8362 ASSERT(!source.is(ecx) && !source.is(edi) && !source.is(ebx));
8135 Label done, right_exponent, normal_exponent; 8363 Label done, right_exponent, normal_exponent;
(...skipping 2657 matching lines...) Expand 10 before | Expand all | Expand 10 after
10793 11021
10794 // Call the runtime; it returns -1 (less), 0 (equal), or 1 (greater) 11022 // Call the runtime; it returns -1 (less), 0 (equal), or 1 (greater)
10795 // tagged as a small integer. 11023 // tagged as a small integer.
10796 __ bind(&runtime); 11024 __ bind(&runtime);
10797 __ TailCallRuntime(ExternalReference(Runtime::kStringCompare), 2, 1); 11025 __ TailCallRuntime(ExternalReference(Runtime::kStringCompare), 2, 1);
10798 } 11026 }
10799 11027
10800 #undef __ 11028 #undef __
10801 11029
10802 } } // namespace v8::internal 11030 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/ia32/codegen-ia32.h ('k') | src/ia32/disasm-ia32.cc » ('j') | src/ia32/disasm-ia32.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698