Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(39)

Side by Side Diff: runtime/vm/stub_code_arm64.cc

Issue 2647913002: Optimizations to IC stub for unoptimized code performance on x64. (Closed)
Patch Set: Feedback from Regis Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « runtime/vm/stub_code_arm.cc ('k') | runtime/vm/stub_code_ia32.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" 5 #include "vm/globals.h"
6 #if defined(TARGET_ARCH_ARM64) 6 #if defined(TARGET_ARCH_ARM64)
7 7
8 #include "vm/assembler.h" 8 #include "vm/assembler.h"
9 #include "vm/code_generator.h" 9 #include "vm/code_generator.h"
10 #include "vm/compiler.h" 10 #include "vm/compiler.h"
(...skipping 1362 matching lines...) Expand 10 before | Expand all | Expand 10 after
1373 __ b(&error, NE); 1373 __ b(&error, NE);
1374 __ ldr(R1, Address(R6, kWordSize)); 1374 __ ldr(R1, Address(R6, kWordSize));
1375 __ CompareImmediate(R1, imm_smi_cid); 1375 __ CompareImmediate(R1, imm_smi_cid);
1376 __ b(&ok, EQ); 1376 __ b(&ok, EQ);
1377 __ Bind(&error); 1377 __ Bind(&error);
1378 __ Stop("Incorrect IC data"); 1378 __ Stop("Incorrect IC data");
1379 __ Bind(&ok); 1379 __ Bind(&ok);
1380 #endif 1380 #endif
1381 if (FLAG_optimization_counter_threshold >= 0) { 1381 if (FLAG_optimization_counter_threshold >= 0) {
1382 const intptr_t count_offset = ICData::CountIndexFor(num_args) * kWordSize; 1382 const intptr_t count_offset = ICData::CountIndexFor(num_args) * kWordSize;
1383 // Update counter. 1383 // Update counter, ignore overflow.
1384 __ LoadFromOffset(R1, R6, count_offset); 1384 __ LoadFromOffset(R1, R6, count_offset);
1385 __ adds(R1, R1, Operand(Smi::RawValue(1))); 1385 __ adds(R1, R1, Operand(Smi::RawValue(1)));
1386 __ LoadImmediate(R2, Smi::RawValue(Smi::kMaxValue));
1387 __ csel(R1, R2, R1, VS); // Overflow.
1388 __ StoreToOffset(R1, R6, count_offset); 1386 __ StoreToOffset(R1, R6, count_offset);
1389 } 1387 }
1390 1388
1391 __ ret(); 1389 __ ret();
1392 } 1390 }
1393 1391
1394 1392
1395 // Generate inline cache check for 'num_args'. 1393 // Generate inline cache check for 'num_args'.
1396 // LR: return address. 1394 // LR: return address.
1397 // R5: inline cache data object. 1395 // R5: inline cache data object.
1398 // Control flow: 1396 // Control flow:
1399 // - If receiver is null -> jump to IC miss. 1397 // - If receiver is null -> jump to IC miss.
1400 // - If receiver is Smi -> load Smi class. 1398 // - If receiver is Smi -> load Smi class.
1401 // - If receiver is not-Smi -> load receiver's class. 1399 // - If receiver is not-Smi -> load receiver's class.
1402 // - Check if 'num_args' (including receiver) match any IC data group. 1400 // - Check if 'num_args' (including receiver) match any IC data group.
1403 // - Match found -> jump to target. 1401 // - Match found -> jump to target.
1404 // - Match not found -> jump to IC miss. 1402 // - Match not found -> jump to IC miss.
1405 void StubCode::GenerateNArgsCheckInlineCacheStub( 1403 void StubCode::GenerateNArgsCheckInlineCacheStub(
1406 Assembler* assembler, 1404 Assembler* assembler,
1407 intptr_t num_args, 1405 intptr_t num_args,
1408 const RuntimeEntry& handle_ic_miss, 1406 const RuntimeEntry& handle_ic_miss,
1409 Token::Kind kind, 1407 Token::Kind kind,
1410 bool optimized) { 1408 bool optimized) {
1411 ASSERT(num_args > 0); 1409 ASSERT(num_args == 1 || num_args == 2);
1412 #if defined(DEBUG) 1410 #if defined(DEBUG)
1413 { 1411 {
1414 Label ok; 1412 Label ok;
1415 // Check that the IC data array has NumArgsTested() == num_args. 1413 // Check that the IC data array has NumArgsTested() == num_args.
1416 // 'NumArgsTested' is stored in the least significant bits of 'state_bits'. 1414 // 'NumArgsTested' is stored in the least significant bits of 'state_bits'.
1417 __ LoadFromOffset(R6, R5, ICData::state_bits_offset() - kHeapObjectTag, 1415 __ LoadFromOffset(R6, R5, ICData::state_bits_offset() - kHeapObjectTag,
1418 kUnsignedWord); 1416 kUnsignedWord);
1419 ASSERT(ICData::NumArgsTestedShift() == 0); // No shift needed. 1417 ASSERT(ICData::NumArgsTestedShift() == 0); // No shift needed.
1420 __ andi(R6, R6, Immediate(ICData::NumArgsTestedMask())); 1418 __ andi(R6, R6, Immediate(ICData::NumArgsTestedMask()));
1421 __ CompareImmediate(R6, num_args); 1419 __ CompareImmediate(R6, num_args);
(...skipping 16 matching lines...) Expand all
1438 Label not_smi_or_overflow; 1436 Label not_smi_or_overflow;
1439 if (kind != Token::kILLEGAL) { 1437 if (kind != Token::kILLEGAL) {
1440 EmitFastSmiOp(assembler, kind, num_args, &not_smi_or_overflow); 1438 EmitFastSmiOp(assembler, kind, num_args, &not_smi_or_overflow);
1441 } 1439 }
1442 __ Bind(&not_smi_or_overflow); 1440 __ Bind(&not_smi_or_overflow);
1443 1441
1444 __ Comment("Extract ICData initial values and receiver cid"); 1442 __ Comment("Extract ICData initial values and receiver cid");
1445 // Load arguments descriptor into R4. 1443 // Load arguments descriptor into R4.
1446 __ LoadFieldFromOffset(R4, R5, ICData::arguments_descriptor_offset()); 1444 __ LoadFieldFromOffset(R4, R5, ICData::arguments_descriptor_offset());
1447 // Loop that checks if there is an IC data match. 1445 // Loop that checks if there is an IC data match.
1448 Label loop, update, test, found; 1446 Label loop, found, miss;
1449 // R5: IC data object (preserved). 1447 // R5: IC data object (preserved).
1450 __ LoadFieldFromOffset(R6, R5, ICData::ic_data_offset()); 1448 __ LoadFieldFromOffset(R6, R5, ICData::ic_data_offset());
1451 // R6: ic_data_array with check entries: classes and target functions. 1449 // R6: ic_data_array with check entries: classes and target functions.
1452 __ AddImmediate(R6, R6, Array::data_offset() - kHeapObjectTag); 1450 __ AddImmediate(R6, R6, Array::data_offset() - kHeapObjectTag);
1453 // R6: points directly to the first ic data array element. 1451 // R6: points directly to the first ic data array element.
1454 1452
1455 // Get the receiver's class ID (first read number of arguments from 1453 // Get the receiver's class ID (first read number of arguments from
1456 // arguments descriptor array and then access the receiver from the stack). 1454 // arguments descriptor array and then access the receiver from the stack).
1457 __ LoadFieldFromOffset(R7, R4, ArgumentsDescriptor::count_offset()); 1455 __ LoadFieldFromOffset(R7, R4, ArgumentsDescriptor::count_offset());
1458 __ SmiUntag(R7); // Untag so we can use the LSL 3 addressing mode. 1456 __ SmiUntag(R7); // Untag so we can use the LSL 3 addressing mode.
1459 __ sub(R7, R7, Operand(1)); 1457 __ sub(R7, R7, Operand(1));
1460 1458
1461 // R0 <- [SP + (R7 << 3)] 1459 // R0 <- [SP + (R7 << 3)]
1462 __ ldr(R0, Address(SP, R7, UXTX, Address::Scaled)); 1460 __ ldr(R0, Address(SP, R7, UXTX, Address::Scaled));
1463 __ LoadTaggedClassIdMayBeSmi(R0, R0); 1461 __ LoadTaggedClassIdMayBeSmi(R0, R0);
1464 1462
1465 // R7: argument_count - 1 (untagged). 1463 if (num_args == 2) {
1466 // R0: receiver's class ID (smi). 1464 __ AddImmediate(R1, R7, -1);
1467 __ ldr(R1, Address(R6)); // First class id (smi) to check. 1465 // R1 <- [SP + (R1 << 3)]
1468 __ b(&test); 1466 __ ldr(R1, Address(SP, R1, UXTX, Address::Scaled));
1467 __ LoadTaggedClassIdMayBeSmi(R1, R1);
1468 }
1469
1470 // We unroll the generic one that is generated once more than the others.
1471 bool optimize = kind == Token::kILLEGAL;
1469 1472
1470 __ Comment("ICData loop"); 1473 __ Comment("ICData loop");
1471 __ Bind(&loop); 1474 __ Bind(&loop);
1472 for (int i = 0; i < num_args; i++) { 1475 for (int unroll = optimize ? 4 : 2; unroll >= 0; unroll--) {
1473 if (i > 0) { 1476 Label update;
1474 // If not the first, load the next argument's class ID. 1477
1475 __ AddImmediate(R0, R7, -i); 1478 __ LoadFromOffset(R2, R6, 0);
1476 // R0 <- [SP + (R0 << 3)] 1479 __ CompareRegisters(R0, R2); // Class id match?
1477 __ ldr(R0, Address(SP, R0, UXTX, Address::Scaled)); 1480 if (num_args == 2) {
1478 __ LoadTaggedClassIdMayBeSmi(R0, R0); 1481 __ b(&update, NE); // Continue.
1479 // R0: next argument class ID (smi). 1482 __ LoadFromOffset(R2, R6, kWordSize);
1480 __ LoadFromOffset(R1, R6, i * kWordSize); 1483 __ CompareRegisters(R1, R2); // Class id match?
1481 // R1: next class ID to check (smi).
1482 } 1484 }
1483 __ CompareRegisters(R0, R1); // Class id match? 1485 __ b(&found, EQ); // Break.
1484 if (i < (num_args - 1)) { 1486
1485 __ b(&update, NE); // Continue. 1487 __ Bind(&update);
1488
1489 const intptr_t entry_size =
1490 ICData::TestEntryLengthFor(num_args) * kWordSize;
1491 __ AddImmediate(R6, R6, entry_size); // Next entry.
1492
1493 __ CompareImmediate(R2, Smi::RawValue(kIllegalCid)); // Done?
1494 if (unroll == 0) {
1495 __ b(&loop, NE);
1486 } else { 1496 } else {
1487 // Last check, all checks before matched. 1497 __ b(&miss, EQ);
1488 __ b(&found, EQ); // Break.
1489 } 1498 }
1490 } 1499 }
1491 __ Bind(&update);
1492 // Reload receiver class ID. It has not been destroyed when num_args == 1.
1493 if (num_args > 1) {
1494 __ ldr(R0, Address(SP, R7, UXTX, Address::Scaled));
1495 __ LoadTaggedClassIdMayBeSmi(R0, R0);
1496 }
1497 1500
1498 const intptr_t entry_size = ICData::TestEntryLengthFor(num_args) * kWordSize; 1501 __ Bind(&miss);
1499 __ AddImmediate(R6, R6, entry_size); // Next entry.
1500 __ ldr(R1, Address(R6)); // Next class ID.
1501
1502 __ Bind(&test);
1503 __ CompareImmediate(R1, Smi::RawValue(kIllegalCid)); // Done?
1504 __ b(&loop, NE);
1505
1506 __ Comment("IC miss"); 1502 __ Comment("IC miss");
1507 // Compute address of arguments. 1503 // Compute address of arguments.
1508 // R7: argument_count - 1 (untagged). 1504 // R7: argument_count - 1 (untagged).
1509 // R7 <- SP + (R7 << 3) 1505 // R7 <- SP + (R7 << 3)
1510 __ add(R7, SP, Operand(R7, UXTX, 3)); // R7 is Untagged. 1506 __ add(R7, SP, Operand(R7, UXTX, 3)); // R7 is Untagged.
1511 // R7: address of receiver. 1507 // R7: address of receiver.
1512 // Create a stub frame as we are pushing some objects on the stack before 1508 // Create a stub frame as we are pushing some objects on the stack before
1513 // calling into the runtime. 1509 // calling into the runtime.
1514 __ EnterStubFrame(); 1510 __ EnterStubFrame();
1515 // Preserve IC data object and arguments descriptor array and 1511 // Preserve IC data object and arguments descriptor array and
(...skipping 27 matching lines...) Expand all
1543 } 1539 }
1544 1540
1545 __ Bind(&found); 1541 __ Bind(&found);
1546 __ Comment("Update caller's counter"); 1542 __ Comment("Update caller's counter");
1547 // R6: pointer to an IC data check group. 1543 // R6: pointer to an IC data check group.
1548 const intptr_t target_offset = ICData::TargetIndexFor(num_args) * kWordSize; 1544 const intptr_t target_offset = ICData::TargetIndexFor(num_args) * kWordSize;
1549 const intptr_t count_offset = ICData::CountIndexFor(num_args) * kWordSize; 1545 const intptr_t count_offset = ICData::CountIndexFor(num_args) * kWordSize;
1550 __ LoadFromOffset(R0, R6, target_offset); 1546 __ LoadFromOffset(R0, R6, target_offset);
1551 1547
1552 if (FLAG_optimization_counter_threshold >= 0) { 1548 if (FLAG_optimization_counter_threshold >= 0) {
1553 // Update counter. 1549 // Update counter, ignore overflow.
1554 __ LoadFromOffset(R1, R6, count_offset); 1550 __ LoadFromOffset(R1, R6, count_offset);
1555 __ adds(R1, R1, Operand(Smi::RawValue(1))); 1551 __ adds(R1, R1, Operand(Smi::RawValue(1)));
1556 __ LoadImmediate(R2, Smi::RawValue(Smi::kMaxValue));
1557 __ csel(R1, R2, R1, VS); // Overflow.
1558 __ StoreToOffset(R1, R6, count_offset); 1552 __ StoreToOffset(R1, R6, count_offset);
1559 } 1553 }
1560 1554
1561 __ Comment("Call target"); 1555 __ Comment("Call target");
1562 __ Bind(&call_target_function); 1556 __ Bind(&call_target_function);
1563 // R0: target function. 1557 // R0: target function.
1564 __ LoadFieldFromOffset(CODE_REG, R0, Function::code_offset()); 1558 __ LoadFieldFromOffset(CODE_REG, R0, Function::code_offset());
1565 __ LoadFieldFromOffset(R2, R0, Function::entry_point_offset()); 1559 __ LoadFieldFromOffset(R2, R0, Function::entry_point_offset());
1566 __ br(R2); 1560 __ br(R2);
1567 1561
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
1672 1666
1673 // R5: IC data object (preserved). 1667 // R5: IC data object (preserved).
1674 __ LoadFieldFromOffset(R6, R5, ICData::ic_data_offset()); 1668 __ LoadFieldFromOffset(R6, R5, ICData::ic_data_offset());
1675 // R6: ic_data_array with entries: target functions and count. 1669 // R6: ic_data_array with entries: target functions and count.
1676 __ AddImmediate(R6, R6, Array::data_offset() - kHeapObjectTag); 1670 __ AddImmediate(R6, R6, Array::data_offset() - kHeapObjectTag);
1677 // R6: points directly to the first ic data array element. 1671 // R6: points directly to the first ic data array element.
1678 const intptr_t target_offset = ICData::TargetIndexFor(0) * kWordSize; 1672 const intptr_t target_offset = ICData::TargetIndexFor(0) * kWordSize;
1679 const intptr_t count_offset = ICData::CountIndexFor(0) * kWordSize; 1673 const intptr_t count_offset = ICData::CountIndexFor(0) * kWordSize;
1680 1674
1681 if (FLAG_optimization_counter_threshold >= 0) { 1675 if (FLAG_optimization_counter_threshold >= 0) {
1682 // Increment count for this call. 1676 // Increment count for this call, ignore overflow.
1683 __ LoadFromOffset(R1, R6, count_offset); 1677 __ LoadFromOffset(R1, R6, count_offset);
1684 __ adds(R1, R1, Operand(Smi::RawValue(1))); 1678 __ adds(R1, R1, Operand(Smi::RawValue(1)));
1685 __ LoadImmediate(R2, Smi::RawValue(Smi::kMaxValue));
1686 __ csel(R1, R2, R1, VS); // Overflow.
1687 __ StoreToOffset(R1, R6, count_offset); 1679 __ StoreToOffset(R1, R6, count_offset);
1688 } 1680 }
1689 1681
1690 // Load arguments descriptor into R4. 1682 // Load arguments descriptor into R4.
1691 __ LoadFieldFromOffset(R4, R5, ICData::arguments_descriptor_offset()); 1683 __ LoadFieldFromOffset(R4, R5, ICData::arguments_descriptor_offset());
1692 1684
1693 // Get function and call it, if possible. 1685 // Get function and call it, if possible.
1694 __ LoadFromOffset(R0, R6, target_offset); 1686 __ LoadFromOffset(R0, R6, target_offset);
1695 __ LoadFieldFromOffset(CODE_REG, R0, Function::code_offset()); 1687 __ LoadFieldFromOffset(CODE_REG, R0, Function::code_offset());
1696 __ LoadFieldFromOffset(R2, R0, Function::entry_point_offset()); 1688 __ LoadFieldFromOffset(R2, R0, Function::entry_point_offset());
(...skipping 640 matching lines...) Expand 10 before | Expand all | Expand 10 after
2337 } 2329 }
2338 2330
2339 2331
2340 void StubCode::GenerateFrameAwaitingMaterializationStub(Assembler* assembler) { 2332 void StubCode::GenerateFrameAwaitingMaterializationStub(Assembler* assembler) {
2341 __ brk(0); 2333 __ brk(0);
2342 } 2334 }
2343 2335
2344 } // namespace dart 2336 } // namespace dart
2345 2337
2346 #endif // defined TARGET_ARCH_ARM64 2338 #endif // defined TARGET_ARCH_ARM64
OLDNEW
« no previous file with comments | « runtime/vm/stub_code_arm.cc ('k') | runtime/vm/stub_code_ia32.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698