Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(275)

Side by Side Diff: runtime/vm/intermediate_language_x64.cc

Issue 15085006: Inline Uint32x4 operations (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_X64. 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_X64.
6 #if defined(TARGET_ARCH_X64) 6 #if defined(TARGET_ARCH_X64)
7 7
8 #include "vm/intermediate_language.h" 8 #include "vm/intermediate_language.h"
9 9
10 #include "lib/error.h" 10 #include "lib/error.h"
(...skipping 2987 matching lines...) Expand 10 before | Expand all | Expand 10 after
2998 2998
2999 2999
3000 void Float32x4ConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) { 3000 void Float32x4ConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3001 XmmRegister v0 = locs()->in(0).fpu_reg(); 3001 XmmRegister v0 = locs()->in(0).fpu_reg();
3002 XmmRegister v1 = locs()->in(1).fpu_reg(); 3002 XmmRegister v1 = locs()->in(1).fpu_reg();
3003 XmmRegister v2 = locs()->in(2).fpu_reg(); 3003 XmmRegister v2 = locs()->in(2).fpu_reg();
3004 XmmRegister v3 = locs()->in(3).fpu_reg(); 3004 XmmRegister v3 = locs()->in(3).fpu_reg();
3005 ASSERT(v0 == locs()->out().fpu_reg()); 3005 ASSERT(v0 == locs()->out().fpu_reg());
3006 __ subq(RSP, Immediate(16)); 3006 __ subq(RSP, Immediate(16));
3007 __ cvtsd2ss(v0, v0); 3007 __ cvtsd2ss(v0, v0);
3008 __ movss(Address(RSP, -16), v0); 3008 __ movss(Address(RSP, 0), v0);
3009 __ movsd(v0, v1); 3009 __ movsd(v0, v1);
3010 __ cvtsd2ss(v0, v0); 3010 __ cvtsd2ss(v0, v0);
3011 __ movss(Address(RSP, -12), v0); 3011 __ movss(Address(RSP, 4), v0);
3012 __ movsd(v0, v2); 3012 __ movsd(v0, v2);
3013 __ cvtsd2ss(v0, v0); 3013 __ cvtsd2ss(v0, v0);
3014 __ movss(Address(RSP, -8), v0); 3014 __ movss(Address(RSP, 8), v0);
3015 __ movsd(v0, v3); 3015 __ movsd(v0, v3);
3016 __ cvtsd2ss(v0, v0); 3016 __ cvtsd2ss(v0, v0);
3017 __ movss(Address(RSP, -4), v0); 3017 __ movss(Address(RSP, 12), v0);
3018 __ movups(v0, Address(RSP, -16)); 3018 __ movups(v0, Address(RSP, 0));
3019 __ addq(RSP, Immediate(16)); 3019 __ addq(RSP, Immediate(16));
3020 } 3020 }
3021 3021
3022 3022
3023 LocationSummary* Float32x4ZeroInstr::MakeLocationSummary() const { 3023 LocationSummary* Float32x4ZeroInstr::MakeLocationSummary() const {
3024 const intptr_t kNumInputs = 0; 3024 const intptr_t kNumInputs = 0;
3025 const intptr_t kNumTemps = 0; 3025 const intptr_t kNumTemps = 0;
3026 LocationSummary* summary = 3026 LocationSummary* summary =
3027 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3027 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3028 summary->set_out(Location::RequiresFpuRegister()); 3028 summary->set_out(Location::RequiresFpuRegister());
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
3244 const intptr_t kNumInputs = 2; 3244 const intptr_t kNumInputs = 2;
3245 const intptr_t kNumTemps = 0; 3245 const intptr_t kNumTemps = 0;
3246 LocationSummary* summary = 3246 LocationSummary* summary =
3247 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3247 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3248 summary->set_in(0, Location::RequiresFpuRegister()); 3248 summary->set_in(0, Location::RequiresFpuRegister());
3249 summary->set_in(1, Location::RequiresFpuRegister()); 3249 summary->set_in(1, Location::RequiresFpuRegister());
3250 summary->set_out(Location::SameAsFirstInput()); 3250 summary->set_out(Location::SameAsFirstInput());
3251 return summary; 3251 return summary;
3252 } 3252 }
3253 3253
3254
3254 void Float32x4WithInstr::EmitNativeCode(FlowGraphCompiler* compiler) { 3255 void Float32x4WithInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3255 XmmRegister replacement = locs()->in(0).fpu_reg(); 3256 XmmRegister replacement = locs()->in(0).fpu_reg();
3256 XmmRegister value = locs()->in(1).fpu_reg(); 3257 XmmRegister value = locs()->in(1).fpu_reg();
3257 3258
3258 ASSERT(locs()->out().fpu_reg() == replacement); 3259 ASSERT(locs()->out().fpu_reg() == replacement);
3259 3260
3260 switch (op_kind()) { 3261 switch (op_kind()) {
3261 case MethodRecognizer::kFloat32x4WithX: 3262 case MethodRecognizer::kFloat32x4WithX:
3262 __ cvtsd2ss(replacement, replacement); 3263 __ cvtsd2ss(replacement, replacement);
3263 __ subq(RSP, Immediate(16)); 3264 __ subq(RSP, Immediate(16));
3264 // Move value to stack. 3265 // Move value to stack.
3265 __ movups(Address(RSP, -16), value); 3266 __ movups(Address(RSP, 0), value);
3266 // Write over X value. 3267 // Write over X value.
3267 __ movss(Address(RSP, -16), replacement); 3268 __ movss(Address(RSP, 0), replacement);
3268 // Move updated value into output register. 3269 // Move updated value into output register.
3269 __ movups(replacement, Address(RSP, -16)); 3270 __ movups(replacement, Address(RSP, 0));
3270 __ addq(RSP, Immediate(16)); 3271 __ addq(RSP, Immediate(16));
3271 break; 3272 break;
3272 case MethodRecognizer::kFloat32x4WithY: 3273 case MethodRecognizer::kFloat32x4WithY:
3273 __ cvtsd2ss(replacement, replacement); 3274 __ cvtsd2ss(replacement, replacement);
3274 __ subq(RSP, Immediate(16)); 3275 __ subq(RSP, Immediate(16));
3275 // Move value to stack. 3276 // Move value to stack.
3276 __ movups(Address(RSP, -16), value); 3277 __ movups(Address(RSP, 0), value);
3277 // Write over Y value. 3278 // Write over Y value.
3278 __ movss(Address(RSP, -12), replacement); 3279 __ movss(Address(RSP, 4), replacement);
3279 // Move updated value into output register. 3280 // Move updated value into output register.
3280 __ movups(replacement, Address(RSP, -16)); 3281 __ movups(replacement, Address(RSP, 0));
3281 __ addq(RSP, Immediate(16)); 3282 __ addq(RSP, Immediate(16));
3282 break; 3283 break;
3283 case MethodRecognizer::kFloat32x4WithZ: 3284 case MethodRecognizer::kFloat32x4WithZ:
3284 __ cvtsd2ss(replacement, replacement); 3285 __ cvtsd2ss(replacement, replacement);
3285 __ subq(RSP, Immediate(16)); 3286 __ subq(RSP, Immediate(16));
3286 // Move value to stack. 3287 // Move value to stack.
3287 __ movups(Address(RSP, -16), value); 3288 __ movups(Address(RSP, 0), value);
3288 // Write over Z value. 3289 // Write over Z value.
3289 __ movss(Address(RSP, -8), replacement); 3290 __ movss(Address(RSP, 8), replacement);
3290 // Move updated value into output register. 3291 // Move updated value into output register.
3291 __ movups(replacement, Address(RSP, -16)); 3292 __ movups(replacement, Address(RSP, 0));
3292 __ addq(RSP, Immediate(16)); 3293 __ addq(RSP, Immediate(16));
3293 break; 3294 break;
3294 case MethodRecognizer::kFloat32x4WithW: 3295 case MethodRecognizer::kFloat32x4WithW:
3295 __ cvtsd2ss(replacement, replacement); 3296 __ cvtsd2ss(replacement, replacement);
3296 __ subq(RSP, Immediate(16)); 3297 __ subq(RSP, Immediate(16));
3297 // Move value to stack. 3298 // Move value to stack.
3298 __ movups(Address(RSP, -16), value); 3299 __ movups(Address(RSP, 0), value);
3299 // Write over W value. 3300 // Write over W value.
3300 __ movss(Address(RSP, -4), replacement); 3301 __ movss(Address(RSP, 12), replacement);
3301 // Move updated value into output register. 3302 // Move updated value into output register.
3302 __ movups(replacement, Address(RSP, -16)); 3303 __ movups(replacement, Address(RSP, 0));
3303 __ addq(RSP, Immediate(16)); 3304 __ addq(RSP, Immediate(16));
3304 break; 3305 break;
3305 default: UNREACHABLE(); 3306 default: UNREACHABLE();
3306 } 3307 }
3307 } 3308 }
3308 3309
3309 3310
3310 LocationSummary* Float32x4ToUint32x4Instr::MakeLocationSummary() const { 3311 LocationSummary* Float32x4ToUint32x4Instr::MakeLocationSummary() const {
3311 const intptr_t kNumInputs = 1; 3312 const intptr_t kNumInputs = 1;
3312 const intptr_t kNumTemps = 0; 3313 const intptr_t kNumTemps = 0;
3313 LocationSummary* summary = 3314 LocationSummary* summary =
3314 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3315 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3315 summary->set_in(0, Location::RequiresFpuRegister()); 3316 summary->set_in(0, Location::RequiresFpuRegister());
3316 summary->set_out(Location::SameAsFirstInput()); 3317 summary->set_out(Location::SameAsFirstInput());
3317 return summary; 3318 return summary;
3318 } 3319 }
3319 3320
3320 3321
3321 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { 3322 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
3322 // NOP. 3323 // NOP.
3323 } 3324 }
3324 3325
3325 3326
3327 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const {
3328 const intptr_t kNumInputs = 4;
3329 const intptr_t kNumTemps = 1;
3330 LocationSummary* summary =
3331 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3332 summary->set_in(0, Location::RequiresRegister());
3333 summary->set_in(1, Location::RequiresRegister());
3334 summary->set_in(2, Location::RequiresRegister());
3335 summary->set_in(3, Location::RequiresRegister());
3336 summary->set_temp(0, Location::RequiresRegister());
3337 summary->set_out(Location::RequiresFpuRegister());
3338 return summary;
3339 }
3340
3341
3342 void Uint32x4BoolConstructorInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3343 Register v0 = locs()->in(0).reg();
3344 Register v1 = locs()->in(1).reg();
3345 Register v2 = locs()->in(2).reg();
3346 Register v3 = locs()->in(3).reg();
3347 Register temp = locs()->temp(0).reg();
3348 XmmRegister result = locs()->out().fpu_reg();
3349 Label x_false, x_done;
3350 Label y_false, y_done;
3351 Label z_false, z_done;
3352 Label w_false, w_done;
3353 __ subq(RSP, Immediate(16));
3354
3355 __ CompareObject(v0, Bool::True());
3356 __ j(NOT_EQUAL, &x_false);
3357 __ movq(temp, Immediate(0xFFFFFFFF));
3358 __ jmp(&x_done);
3359 __ Bind(&x_false);
3360 __ movq(temp, Immediate(0x0));
3361 __ Bind(&x_done);
3362 __ movl(Address(RSP, 0), temp);
3363
3364 __ CompareObject(v1, Bool::True());
3365 __ j(NOT_EQUAL, &y_false);
3366 __ movq(temp, Immediate(0xFFFFFFFF));
3367 __ jmp(&y_done);
3368 __ Bind(&y_false);
3369 __ movq(temp, Immediate(0x0));
3370 __ Bind(&y_done);
3371 __ movl(Address(RSP, 4), temp);
3372
3373 __ CompareObject(v2, Bool::True());
3374 __ j(NOT_EQUAL, &z_false);
3375 __ movq(temp, Immediate(0xFFFFFFFF));
3376 __ jmp(&z_done);
3377 __ Bind(&z_false);
3378 __ movq(temp, Immediate(0x0));
3379 __ Bind(&z_done);
3380 __ movl(Address(RSP, 8), temp);
3381
3382 __ CompareObject(v3, Bool::True());
3383 __ j(NOT_EQUAL, &w_false);
3384 __ movq(temp, Immediate(0xFFFFFFFF));
3385 __ jmp(&w_done);
3386 __ Bind(&w_false);
3387 __ movq(temp, Immediate(0x0));
3388 __ Bind(&w_done);
3389 __ movl(Address(RSP, 12), temp);
3390
3391 __ movups(result, Address(RSP, 0));
3392 __ addq(RSP, Immediate(16));
3393 }
3394
3395
3396 LocationSummary* Uint32x4GetFlagInstr::MakeLocationSummary() const {
3397 const intptr_t kNumInputs = 1;
3398 const intptr_t kNumTemps = 0;
3399 LocationSummary* summary =
3400 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3401 summary->set_in(0, Location::RequiresFpuRegister());
3402 summary->set_out(Location::RequiresRegister());
3403 return summary;
3404 }
3405
3406
3407 void Uint32x4GetFlagInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3408 XmmRegister value = locs()->in(0).fpu_reg();
3409 Register result = locs()->out().reg();
3410 Label done;
3411 Label non_zero;
3412 __ movmskps(result, value);
3413 switch (op_kind()) {
3414 case MethodRecognizer::kUint32x4GetFlagX:
3415 __ testl(result, Immediate(0x1));
3416 break;
3417 case MethodRecognizer::kUint32x4GetFlagY:
3418 __ testl(result, Immediate(0x2));
3419 break;
3420 case MethodRecognizer::kUint32x4GetFlagZ:
3421 __ testl(result, Immediate(0x3));
3422 break;
3423 case MethodRecognizer::kUint32x4GetFlagW:
3424 __ testl(result, Immediate(0x4));
3425 break;
3426 default: UNREACHABLE();
3427 }
3428 __ j(NOT_ZERO, &non_zero, Assembler::kNearJump);
3429 __ LoadObject(result, Bool::False());
3430 __ jmp(&done);
3431 __ Bind(&non_zero);
3432 __ LoadObject(result, Bool::True());
3433 __ Bind(&done);
3434 }
3435
3436
3437 LocationSummary* Uint32x4SelectInstr::MakeLocationSummary() const {
3438 const intptr_t kNumInputs = 3;
3439 const intptr_t kNumTemps = 1;
3440 LocationSummary* summary =
3441 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3442 summary->set_in(0, Location::RequiresFpuRegister());
3443 summary->set_in(1, Location::RequiresFpuRegister());
3444 summary->set_in(2, Location::RequiresFpuRegister());
3445 summary->set_temp(0, Location::RequiresFpuRegister());
3446 summary->set_out(Location::SameAsFirstInput());
3447 return summary;
3448 }
3449
3450
3451 void Uint32x4SelectInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3452 XmmRegister mask = locs()->in(0).fpu_reg();
3453 XmmRegister trueValue = locs()->in(1).fpu_reg();
3454 XmmRegister falseValue = locs()->in(2).fpu_reg();
3455 XmmRegister out = locs()->out().fpu_reg();
3456 XmmRegister temp = locs()->temp(0).fpu_reg();
3457 ASSERT(out == mask);
3458 // Copy mask.
3459 __ movaps(temp, mask);
3460 // Invert it.
3461 __ notps(temp);
3462 // mask = mask & trueValue.
3463 __ andps(mask, trueValue);
3464 // temp = temp & falseValue.
3465 __ andps(temp, falseValue);
3466 // out = mask | temp.
3467 __ orps(mask, temp);
3468 }
3469
3470
3471 LocationSummary* Uint32x4SetFlagInstr::MakeLocationSummary() const {
3472 const intptr_t kNumInputs = 2;
3473 const intptr_t kNumTemps = 1;
3474 LocationSummary* summary =
3475 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3476 summary->set_in(0, Location::RequiresFpuRegister());
3477 summary->set_in(1, Location::RequiresRegister());
3478 summary->set_temp(0, Location::RequiresRegister());
3479 summary->set_out(Location::SameAsFirstInput());
3480 return summary;
3481 }
3482
3483
3484 void Uint32x4SetFlagInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3485 XmmRegister mask = locs()->in(0).fpu_reg();
3486 Register flag = locs()->in(1).reg();
3487 Register temp = locs()->temp(0).reg();
3488 ASSERT(mask == locs()->out().fpu_reg());
3489 __ subq(RSP, Immediate(16));
3490 // Copy mask to stack.
3491 __ movups(Address(RSP, 0), mask);
3492 Label falsePath, exitPath;
3493 __ CompareObject(flag, Bool::True());
3494 __ j(NOT_EQUAL, &falsePath);
3495 switch (op_kind()) {
3496 case MethodRecognizer::kUint32x4WithFlagX:
3497 __ movq(temp, Immediate(0xFFFFFFFF));
3498 __ movl(Address(RSP, 0), temp);
3499 __ jmp(&exitPath);
3500 __ Bind(&falsePath);
3501 __ movq(temp, Immediate(0x0));
3502 __ movl(Address(RSP, 0), temp);
3503 break;
3504 case MethodRecognizer::kUint32x4WithFlagY:
3505 __ movq(temp, Immediate(0xFFFFFFFF));
3506 __ movl(Address(RSP, 4), temp);
3507 __ jmp(&exitPath);
3508 __ Bind(&falsePath);
3509 __ movq(temp, Immediate(0x0));
3510 __ movl(Address(RSP, 4), temp);
3511 break;
3512 case MethodRecognizer::kUint32x4WithFlagZ:
3513 __ movq(temp, Immediate(0xFFFFFFFF));
3514 __ movl(Address(RSP, 8), temp);
3515 __ jmp(&exitPath);
3516 __ Bind(&falsePath);
3517 __ movq(temp, Immediate(0x0));
3518 __ movl(Address(RSP, 8), temp);
3519 break;
3520 case MethodRecognizer::kUint32x4WithFlagW:
3521 __ movq(temp, Immediate(0xFFFFFFFF));
3522 __ movl(Address(RSP, 12), temp);
3523 __ jmp(&exitPath);
3524 __ Bind(&falsePath);
3525 __ movq(temp, Immediate(0x0));
3526 __ movl(Address(RSP, 12), temp);
3527 break;
3528 default: UNREACHABLE();
3529 }
3530 __ Bind(&exitPath);
3531 // Copy mask back to register.
3532 __ movups(mask, Address(RSP, 0));
3533 __ addq(RSP, Immediate(16));
3534 }
3535
3536
3537 LocationSummary* Uint32x4ToFloat32x4Instr::MakeLocationSummary() const {
3538 const intptr_t kNumInputs = 1;
3539 const intptr_t kNumTemps = 0;
3540 LocationSummary* summary =
3541 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3542 summary->set_in(0, Location::RequiresFpuRegister());
3543 summary->set_out(Location::SameAsFirstInput());
3544 return summary;
3545 }
3546
3547
3548 void Uint32x4ToFloat32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
3549 // NOP.
3550 }
3551
3552
3553 LocationSummary* BinaryUint32x4OpInstr::MakeLocationSummary() const {
3554 const intptr_t kNumInputs = 2;
3555 const intptr_t kNumTemps = 0;
3556 LocationSummary* summary =
3557 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3558 summary->set_in(0, Location::RequiresFpuRegister());
3559 summary->set_in(1, Location::RequiresFpuRegister());
3560 summary->set_out(Location::SameAsFirstInput());
3561 return summary;
3562 }
3563
3564
3565 void BinaryUint32x4OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3566 XmmRegister left = locs()->in(0).fpu_reg();
3567 XmmRegister right = locs()->in(1).fpu_reg();
3568 ASSERT(left == locs()->out().fpu_reg());
3569 switch (op_kind()) {
3570 case Token::kBIT_AND: {
3571 __ andps(left, right);
3572 break;
3573 }
3574 case Token::kBIT_OR: {
3575 __ orps(left, right);
3576 break;
3577 }
3578 case Token::kBIT_XOR: {
3579 __ xorps(left, right);
3580 break;
3581 }
3582 default: UNREACHABLE();
3583 }
3584 }
3585
3586
3326 LocationSummary* MathSqrtInstr::MakeLocationSummary() const { 3587 LocationSummary* MathSqrtInstr::MakeLocationSummary() const {
3327 const intptr_t kNumInputs = 1; 3588 const intptr_t kNumInputs = 1;
3328 const intptr_t kNumTemps = 0; 3589 const intptr_t kNumTemps = 0;
3329 LocationSummary* summary = 3590 LocationSummary* summary =
3330 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3591 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3331 summary->set_in(0, Location::RequiresFpuRegister()); 3592 summary->set_in(0, Location::RequiresFpuRegister());
3332 summary->set_out(Location::RequiresFpuRegister()); 3593 summary->set_out(Location::RequiresFpuRegister());
3333 return summary; 3594 return summary;
3334 } 3595 }
3335 3596
(...skipping 716 matching lines...) Expand 10 before | Expand all | Expand 10 after
4052 PcDescriptors::kOther, 4313 PcDescriptors::kOther,
4053 locs()); 4314 locs());
4054 __ Drop(2); // Discard type arguments and receiver. 4315 __ Drop(2); // Discard type arguments and receiver.
4055 } 4316 }
4056 4317
4057 } // namespace dart 4318 } // namespace dart
4058 4319
4059 #undef __ 4320 #undef __
4060 4321
4061 #endif // defined TARGET_ARCH_X64 4322 #endif // defined TARGET_ARCH_X64
OLDNEW
« runtime/vm/intermediate_language.h ('K') | « runtime/vm/intermediate_language_mips.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698