Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/jsregexp.cc

Issue 14194: * Generate quick checks based on mask and compare for... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 12 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
207 class OffsetsVector { 207 class OffsetsVector {
208 public: 208 public:
209 inline OffsetsVector(int num_registers) 209 inline OffsetsVector(int num_registers)
210 : offsets_vector_length_(num_registers) { 210 : offsets_vector_length_(num_registers) {
211 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { 211 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
212 vector_ = NewArray<int>(offsets_vector_length_); 212 vector_ = NewArray<int>(offsets_vector_length_);
213 } else { 213 } else {
214 vector_ = static_offsets_vector_; 214 vector_ = static_offsets_vector_;
215 } 215 }
216 } 216 }
217
218
219 inline ~OffsetsVector() { 217 inline ~OffsetsVector() {
220 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { 218 if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
221 DeleteArray(vector_); 219 DeleteArray(vector_);
222 vector_ = NULL; 220 vector_ = NULL;
223 } 221 }
224 } 222 }
225 223 inline int* vector() { return vector_; }
226 224 inline int length() { return offsets_vector_length_; }
227 inline int* vector() {
228 return vector_;
229 }
230
231
232 inline int length() {
233 return offsets_vector_length_;
234 }
235 225
236 private: 226 private:
237 int* vector_; 227 int* vector_;
238 int offsets_vector_length_; 228 int offsets_vector_length_;
239 static const int kStaticOffsetsVectorSize = 50; 229 static const int kStaticOffsetsVectorSize = 50;
240 static int static_offsets_vector_[kStaticOffsetsVectorSize]; 230 static int static_offsets_vector_[kStaticOffsetsVectorSize];
241 }; 231 };
242 232
243 233
244 int OffsetsVector::static_offsets_vector_[ 234 int OffsetsVector::static_offsets_vector_[
(...skipping 551 matching lines...) Expand 10 before | Expand all | Expand 10 after
796 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); 786 int previous_index = static_cast<int>(DoubleToInteger(index->Number()));
797 787
798 #ifdef DEBUG 788 #ifdef DEBUG
799 if (FLAG_trace_regexp_bytecodes) { 789 if (FLAG_trace_regexp_bytecodes) {
800 String* pattern = regexp->Pattern(); 790 String* pattern = regexp->Pattern();
801 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 791 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
802 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 792 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
803 } 793 }
804 #endif 794 #endif
805 LOG(RegExpExecEvent(regexp, previous_index, subject)); 795 LOG(RegExpExecEvent(regexp, previous_index, subject));
796
797 if (!subject->IsFlat(StringShape(*subject))) {
798 FlattenString(subject);
799 }
800
806 return IrregexpExecOnce(irregexp, 801 return IrregexpExecOnce(irregexp,
807 num_captures, 802 num_captures,
808 subject, 803 subject,
809 previous_index, 804 previous_index,
810 offsets.vector(), 805 offsets.vector(),
811 offsets.length()); 806 offsets.length());
812 } 807 }
813 808
814 809
815 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, 810 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
(...skipping 14 matching lines...) Expand all
830 int previous_index = 0; 825 int previous_index = 0;
831 826
832 Handle<JSArray> result = Factory::NewJSArray(0); 827 Handle<JSArray> result = Factory::NewJSArray(0);
833 int i = 0; 828 int i = 0;
834 Handle<Object> matches; 829 Handle<Object> matches;
835 830
836 if (!subject->IsFlat(shape)) { 831 if (!subject->IsFlat(shape)) {
837 subject->Flatten(shape); 832 subject->Flatten(shape);
838 } 833 }
839 834
840 do { 835 while (true) {
841 if (previous_index > subject->length() || previous_index < 0) { 836 if (previous_index > subject->length() || previous_index < 0) {
842 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 837 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
843 // string length, there is no match. 838 // string length, there is no match.
844 matches = Factory::null_value(); 839 matches = Factory::null_value();
840 return result;
845 } else { 841 } else {
846 #ifdef DEBUG 842 #ifdef DEBUG
847 if (FLAG_trace_regexp_bytecodes) { 843 if (FLAG_trace_regexp_bytecodes) {
848 String* pattern = regexp->Pattern(); 844 String* pattern = regexp->Pattern();
849 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); 845 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
850 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); 846 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
851 } 847 }
852 #endif 848 #endif
853 LOG(RegExpExecEvent(regexp, previous_index, subject)); 849 LOG(RegExpExecEvent(regexp, previous_index, subject));
854 matches = IrregexpExecOnce(irregexp, 850 matches = IrregexpExecOnce(irregexp,
855 IrregexpNumberOfCaptures(irregexp), 851 IrregexpNumberOfCaptures(irregexp),
856 subject, 852 subject,
857 previous_index, 853 previous_index,
858 offsets.vector(), 854 offsets.vector(),
859 offsets.length()); 855 offsets.length());
860 856
861 if (matches->IsJSArray()) { 857 if (matches->IsJSArray()) {
862 SetElement(result, i, matches); 858 SetElement(result, i, matches);
863 i++; 859 i++;
864 previous_index = offsets.vector()[1]; 860 previous_index = offsets.vector()[1];
865 if (offsets.vector()[0] == offsets.vector()[1]) { 861 if (offsets.vector()[0] == offsets.vector()[1]) {
866 previous_index++; 862 previous_index++;
867 } 863 }
864 } else if (matches->IsNull()) {
865 return result;
866 } else {
867 return matches;
868 } 868 }
869 } 869 }
870 } while (matches->IsJSArray());
871
872 // If we exited the loop with an exception, throw it.
873 if (matches->IsNull()) {
874 // Exited loop normally.
875 return result;
876 } else {
877 // Exited loop with the exception in matches.
878 return matches;
879 } 870 }
880 } 871 }
881 872
882 873
883 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, 874 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
884 int num_captures, 875 int num_captures,
885 Handle<String> subject, 876 Handle<String> subject,
886 int previous_index, 877 int previous_index,
887 int* offsets_vector, 878 int* offsets_vector,
888 int offsets_vector_length) { 879 int offsets_vector_length) {
880 ASSERT(subject->IsFlat(StringShape(*subject)));
889 bool rc; 881 bool rc;
890 882
891 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); 883 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
892 884
893 if (!subject->IsFlat(StringShape(*subject))) {
894 FlattenString(subject);
895 }
896
897 switch (tag) { 885 switch (tag) {
898 case RegExpMacroAssembler::kIA32Implementation: { 886 case RegExpMacroAssembler::kIA32Implementation: {
899 #ifndef ARM 887 #ifndef ARM
900 Handle<Code> code = IrregexpNativeCode(irregexp); 888 Handle<Code> code = IrregexpNativeCode(irregexp);
901 889
902 StringShape shape(*subject); 890 StringShape shape(*subject);
903 891
904 // Character offsets into string. 892 // Character offsets into string.
905 int start_offset = previous_index; 893 int start_offset = previous_index;
906 int end_offset = subject->length(shape); 894 int end_offset = subject->length(shape);
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
990 rc = false; 978 rc = false;
991 break; 979 break;
992 } 980 }
993 981
994 if (!rc) { 982 if (!rc) {
995 return Factory::null_value(); 983 return Factory::null_value();
996 } 984 }
997 985
998 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); 986 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
999 // The captures come in (start, end+1) pairs. 987 // The captures come in (start, end+1) pairs.
1000 for (int i = 0; i < 2 * (num_captures+1); i += 2) { 988 for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
1001 array->set(i, Smi::FromInt(offsets_vector[i])); 989 array->set(i, Smi::FromInt(offsets_vector[i]));
1002 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); 990 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
1003 } 991 }
1004 return Factory::NewJSArrayWithElements(array); 992 return Factory::NewJSArrayWithElements(array);
1005 } 993 }
1006 994
1007 995
1008 // ------------------------------------------------------------------- 996 // -------------------------------------------------------------------
1009 // Implmentation of the Irregexp regular expression engine. 997 // Implmentation of the Irregexp regular expression engine.
1010 // 998 //
1011 // The Irregexp regular expression engine is intended to be a complete 999 // The Irregexp regular expression engine is intended to be a complete
1012 // implementation of ECMAScript regular expressions. It generates either 1000 // implementation of ECMAScript regular expressions. It generates either
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after
1337 for (DeferredAction* action = actions_; 1325 for (DeferredAction* action = actions_;
1338 action != NULL; 1326 action != NULL;
1339 action = action->next()) { 1327 action = action->next()) {
1340 affected_registers->Set(action->reg()); 1328 affected_registers->Set(action->reg());
1341 if (action->reg() > max_register) max_register = action->reg(); 1329 if (action->reg() > max_register) max_register = action->reg();
1342 } 1330 }
1343 return max_register; 1331 return max_register;
1344 } 1332 }
1345 1333
1346 1334
1347 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro, 1335 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* assembler,
1348 int max_register, 1336 int max_register,
1349 OutSet& affected_registers) { 1337 OutSet& affected_registers) {
1350 for (int reg = 0; reg <= max_register; reg++) { 1338 for (int reg = 0; reg <= max_register; reg++) {
1351 if (affected_registers.Get(reg)) macro->PushRegister(reg); 1339 if (affected_registers.Get(reg)) assembler->PushRegister(reg);
1352 } 1340 }
1353 } 1341 }
1354 1342
1355 1343
1356 void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro, 1344 void GenerationVariant::RestoreAffectedRegisters(
1357 int max_register, 1345 RegExpMacroAssembler* assembler,
1358 OutSet& affected_registers) { 1346 int max_register,
1347 OutSet& affected_registers) {
1359 for (int reg = max_register; reg >= 0; reg--) { 1348 for (int reg = max_register; reg >= 0; reg--) {
1360 if (affected_registers.Get(reg)) macro->PopRegister(reg); 1349 if (affected_registers.Get(reg)) assembler->PopRegister(reg);
1361 } 1350 }
1362 } 1351 }
1363 1352
1364 1353
1365 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro, 1354 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* assembler,
1366 int max_register, 1355 int max_register,
1367 OutSet& affected_registers) { 1356 OutSet& affected_registers) {
1368 for (int reg = 0; reg <= max_register; reg++) { 1357 for (int reg = 0; reg <= max_register; reg++) {
1369 if (!affected_registers.Get(reg)) { 1358 if (!affected_registers.Get(reg)) {
1370 continue; 1359 continue;
1371 } 1360 }
1372 int value = 0; 1361 int value = 0;
1373 bool absolute = false; 1362 bool absolute = false;
1374 int store_position = -1; 1363 int store_position = -1;
1375 // This is a little tricky because we are scanning the actions in reverse 1364 // This is a little tricky because we are scanning the actions in reverse
(...skipping 27 matching lines...) Expand all
1403 ASSERT_EQ(value, 0); 1392 ASSERT_EQ(value, 0);
1404 break; 1393 break;
1405 } 1394 }
1406 default: 1395 default:
1407 UNREACHABLE(); 1396 UNREACHABLE();
1408 break; 1397 break;
1409 } 1398 }
1410 } 1399 }
1411 } 1400 }
1412 if (store_position != -1) { 1401 if (store_position != -1) {
1413 macro->WriteCurrentPositionToRegister(reg, store_position); 1402 assembler->WriteCurrentPositionToRegister(reg, store_position);
1414 } else { 1403 } else {
1415 if (absolute) { 1404 if (absolute) {
1416 macro->SetRegister(reg, value); 1405 assembler->SetRegister(reg, value);
1417 } else { 1406 } else {
1418 if (value != 0) { 1407 if (value != 0) {
1419 macro->AdvanceRegister(reg, value); 1408 assembler->AdvanceRegister(reg, value);
1420 } 1409 }
1421 } 1410 }
1422 } 1411 }
1423 } 1412 }
1424 } 1413 }
1425 1414
1426 1415
1427 // This is called as we come into a loop choice node and some other tricky 1416 // This is called as we come into a loop choice node and some other tricky
1428 // nodes. It normalises the state of the code generator to ensure we can 1417 // nodes. It normalises the state of the code generator to ensure we can
1429 // generate generic code. 1418 // generate generic code.
1430 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1431 RegExpMacroAssembler* macro = compiler->macro_assembler(); 1420 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1432 1421
1433 ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL); 1422 ASSERT(actions_ != NULL ||
1423 cp_offset_ != 0 ||
1424 backtrack() != NULL ||
1425 characters_preloaded_ != 0 ||
1426 quick_check_performed_.characters() != 0);
1434 1427
1435 if (actions_ == NULL && backtrack() == NULL) { 1428 if (actions_ == NULL && backtrack() == NULL) {
1436 // Here we just have some deferred cp advances to fix and we are back to 1429 // Here we just have some deferred cp advances to fix and we are back to
1437 // a normal situation. 1430 // a normal situation. We may also have to forget some information gained
1438 macro->AdvanceCurrentPosition(cp_offset_); 1431 // through a quick check that was already performed.
1432 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1439 // Create a new trivial state and generate the node with that. 1433 // Create a new trivial state and generate the node with that.
1440 GenerationVariant new_state; 1434 GenerationVariant new_state;
1441 return successor->Emit(compiler, &new_state); 1435 return successor->Emit(compiler, &new_state);
1442 } 1436 }
1443 1437
1444 // Generate deferred actions here along with code to undo them again. 1438 // Generate deferred actions here along with code to undo them again.
1445 OutSet affected_registers; 1439 OutSet affected_registers;
1446 int max_register = FindAffectedRegisters(&affected_registers); 1440 int max_register = FindAffectedRegisters(&affected_registers);
1447 PushAffectedRegisters(macro, max_register, affected_registers); 1441 PushAffectedRegisters(assembler, max_register, affected_registers);
1448 PerformDeferredActions(macro, max_register, affected_registers); 1442 PerformDeferredActions(assembler, max_register, affected_registers);
1449 if (backtrack() != NULL) { 1443 if (backtrack() != NULL) {
1450 // Here we have a concrete backtrack location. These are set up by choice 1444 // Here we have a concrete backtrack location. These are set up by choice
1451 // nodes and so they indicate that we have a deferred save of the current 1445 // nodes and so they indicate that we have a deferred save of the current
1452 // position which we may need to emit here. 1446 // position which we may need to emit here.
1453 macro->PushCurrentPosition(); 1447 assembler->PushCurrentPosition();
1454 } 1448 }
1455 if (cp_offset_ != 0) { 1449 if (cp_offset_ != 0) {
1456 macro->AdvanceCurrentPosition(cp_offset_); 1450 assembler->AdvanceCurrentPosition(cp_offset_);
1457 } 1451 }
1458 1452
1459 // Create a new trivial state and generate the node with that. 1453 // Create a new trivial state and generate the node with that.
1460 Label undo; 1454 Label undo;
1461 macro->PushBacktrack(&undo); 1455 assembler->PushBacktrack(&undo);
1462 GenerationVariant new_state; 1456 GenerationVariant new_state;
1463 bool ok = successor->Emit(compiler, &new_state); 1457 bool ok = successor->Emit(compiler, &new_state);
1464 1458
1465 // On backtrack we need to restore state. 1459 // On backtrack we need to restore state.
1466 macro->Bind(&undo); 1460 assembler->Bind(&undo);
1467 if (!ok) return false; 1461 if (!ok) return false;
1468 if (backtrack() != NULL) { 1462 if (backtrack() != NULL) {
1469 macro->PopCurrentPosition(); 1463 assembler->PopCurrentPosition();
1470 } 1464 }
1471 RestoreAffectedRegisters(macro, max_register, affected_registers); 1465 RestoreAffectedRegisters(assembler, max_register, affected_registers);
1472 if (backtrack() == NULL) { 1466 if (backtrack() == NULL) {
1473 macro->Backtrack(); 1467 assembler->Backtrack();
1474 } else { 1468 } else {
1475 macro->GoTo(backtrack()); 1469 assembler->GoTo(backtrack());
1476 } 1470 }
1477 1471
1478 return true; 1472 return true;
1479 } 1473 }
1480 1474
1481 1475
1482 void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro, 1476 void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler,
1483 GenerationVariant* variant) { 1477 GenerationVariant* variant) {
1484 if (info()->at_end) { 1478 if (info()->at_end) {
1485 Label succeed; 1479 Label succeed;
1486 // LoadCurrentCharacter will go to the label if we are at the end of the 1480 // LoadCurrentCharacter will go to the label if we are at the end of the
1487 // input string. 1481 // input string.
1488 macro->LoadCurrentCharacter(0, &succeed); 1482 assembler->LoadCurrentCharacter(0, &succeed);
1489 macro->GoTo(variant->backtrack()); 1483 assembler->GoTo(variant->backtrack());
1490 macro->Bind(&succeed); 1484 assembler->Bind(&succeed);
1491 } 1485 }
1492 } 1486 }
1493 1487
1494 1488
1495 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, 1489 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler,
1496 GenerationVariant* variant) { 1490 GenerationVariant* variant) {
1497 if (!variant->is_trivial()) { 1491 if (!variant->is_trivial()) {
1498 return variant->Flush(compiler, this); 1492 return variant->Flush(compiler, this);
1499 } 1493 }
1500 RegExpMacroAssembler* macro = compiler->macro_assembler(); 1494 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1501 if (!label()->is_bound()) { 1495 if (!label()->is_bound()) {
1502 macro->Bind(label()); 1496 assembler->Bind(label());
1503 } 1497 }
1504 EmitInfoChecks(macro, variant); 1498 EmitInfoChecks(assembler, variant);
1505 macro->ReadCurrentPositionFromRegister(current_position_register_); 1499 assembler->ReadCurrentPositionFromRegister(current_position_register_);
1506 macro->ReadStackPointerFromRegister(stack_pointer_register_); 1500 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1507 // Now that we have unwound the stack we find at the top of the stack the 1501 // Now that we have unwound the stack we find at the top of the stack the
1508 // backtrack that the BeginSubmatch node got. 1502 // backtrack that the BeginSubmatch node got.
1509 macro->Backtrack(); 1503 assembler->Backtrack();
1510 return true; 1504 return true;
1511 } 1505 }
1512 1506
1513 1507
1514 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { 1508 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
1515 if (!variant->is_trivial()) { 1509 if (!variant->is_trivial()) {
1516 return variant->Flush(compiler, this); 1510 return variant->Flush(compiler, this);
1517 } 1511 }
1518 RegExpMacroAssembler* macro = compiler->macro_assembler(); 1512 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1519 if (!label()->is_bound()) { 1513 if (!label()->is_bound()) {
1520 macro->Bind(label()); 1514 assembler->Bind(label());
1521 } 1515 }
1522 switch (action_) { 1516 switch (action_) {
1523 case ACCEPT: 1517 case ACCEPT:
1524 EmitInfoChecks(macro, variant); 1518 EmitInfoChecks(assembler, variant);
1525 macro->Succeed(); 1519 assembler->Succeed();
1526 return true; 1520 return true;
1527 case BACKTRACK: 1521 case BACKTRACK:
1528 ASSERT(!info()->at_end); 1522 ASSERT(!info()->at_end);
1529 macro->GoTo(variant->backtrack()); 1523 assembler->GoTo(variant->backtrack());
1530 return true; 1524 return true;
1531 case NEGATIVE_SUBMATCH_SUCCESS: 1525 case NEGATIVE_SUBMATCH_SUCCESS:
1532 // This case is handled in a different virtual method. 1526 // This case is handled in a different virtual method.
1533 UNREACHABLE(); 1527 UNREACHABLE();
1534 } 1528 }
1535 UNIMPLEMENTED(); 1529 UNIMPLEMENTED();
1536 return false; 1530 return false;
1537 } 1531 }
1538 1532
1539 1533
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
1622 variant->backtrack()); 1616 variant->backtrack());
1623 break; 1617 break;
1624 } 1618 }
1625 } 1619 }
1626 1620
1627 1621
1628 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; 1622 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
1629 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; 1623 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
1630 1624
1631 1625
1632 static inline void EmitAtomNonLetters( 1626 // Only emits non-letters (things that don't have case). Only used for case
1627 // independent matches.
1628 static inline bool EmitAtomNonLetter(
1633 RegExpMacroAssembler* macro_assembler, 1629 RegExpMacroAssembler* macro_assembler,
1634 TextElement elm, 1630 uc16 c,
1635 Vector<const uc16> quarks,
1636 Label* on_failure, 1631 Label* on_failure,
1637 int cp_offset, 1632 int cp_offset,
1638 bool check_offset) { 1633 bool check,
1634 bool preloaded) {
1639 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1635 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1640 // It is vital that this loop is backwards due to the unchecked character 1636 int length = uncanonicalize.get(c, '\0', chars);
1641 // load below. 1637 bool checked = false;
1642 for (int i = quarks.length() - 1; i >= 0; i--) { 1638 if (length <= 1) {
1643 uc16 c = quarks[i]; 1639 if (!preloaded) {
1644 int length = uncanonicalize.get(c, '\0', chars); 1640 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1645 if (length <= 1) { 1641 checked = check;
1646 if (check_offset && i == quarks.length() - 1) {
1647 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
1648 } else {
1649 // Here we don't need to check against the end of the input string
1650 // since this character lies before a character that matched.
1651 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
1652 }
1653 macro_assembler->CheckNotCharacter(c, on_failure);
1654 } 1642 }
1643 macro_assembler->CheckNotCharacter(c, on_failure);
1655 } 1644 }
1645 return checked;
1656 } 1646 }
1657 1647
1658 1648
1659 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, 1649 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1660 uc16 c1, 1650 uc16 c1,
1661 uc16 c2, 1651 uc16 c2,
1662 Label* on_failure) { 1652 Label* on_failure) {
1663 uc16 exor = c1 ^ c2; 1653 uc16 exor = c1 ^ c2;
1664 // Check whether exor has only one bit set. 1654 // Check whether exor has only one bit set.
1665 if (((exor - 1) & exor) == 0) { 1655 if (((exor - 1) & exor) == 0) {
1666 // If c1 and c2 differ only by one bit. 1656 // If c1 and c2 differ only by one bit.
1667 // Ecma262UnCanonicalize always gives the highest number last. 1657 // Ecma262UnCanonicalize always gives the highest number last.
1668 ASSERT(c2 > c1); 1658 ASSERT(c2 > c1);
1669 macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure); 1659 uc16 mask = String::kMaxUC16CharCode ^ exor;
1660 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1670 return true; 1661 return true;
1671 } 1662 }
1672 ASSERT(c2 > c1); 1663 ASSERT(c2 > c1);
1673 uc16 diff = c2 - c1; 1664 uc16 diff = c2 - c1;
1674 if (((diff - 1) & diff) == 0 && c1 >= diff) { 1665 if (((diff - 1) & diff) == 0 && c1 >= diff) {
1675 // If the characters differ by 2^n but don't differ by one bit then 1666 // If the characters differ by 2^n but don't differ by one bit then
1676 // subtract the difference from the found character, then do the or 1667 // subtract the difference from the found character, then do the or
1677 // trick. We avoid the theoretical case where negative numbers are 1668 // trick. We avoid the theoretical case where negative numbers are
1678 // involved in order to simplify code generation. 1669 // involved in order to simplify code generation.
1679 macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff, 1670 uc16 mask = String::kMaxUC16CharCode ^ diff;
1680 diff, 1671 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1681 on_failure); 1672 diff,
1673 mask,
1674 on_failure);
1682 return true; 1675 return true;
1683 } 1676 }
1684 return false; 1677 return false;
1685 } 1678 }
1686 1679
1687 1680
1688 static inline void EmitAtomLetters( 1681 // Only emits letters (things that have case). Only used for case independent
1682 // matches.
1683 static inline bool EmitAtomLetter(
1689 RegExpMacroAssembler* macro_assembler, 1684 RegExpMacroAssembler* macro_assembler,
1690 TextElement elm, 1685 uc16 c,
1691 Vector<const uc16> quarks,
1692 Label* on_failure, 1686 Label* on_failure,
1693 int cp_offset, 1687 int cp_offset,
1694 bool check_offset) { 1688 bool check,
1689 bool preloaded) {
1695 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1690 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1696 // It is vital that this loop is backwards due to the unchecked character 1691 int length = uncanonicalize.get(c, '\0', chars);
1697 // load below. 1692 if (length <= 1) return false;
1698 for (int i = quarks.length() - 1; i >= 0; i--) { 1693 // We may not need to check against the end of the input string
1699 uc16 c = quarks[i]; 1694 // if this character lies before a character that matched.
1700 int length = uncanonicalize.get(c, '\0', chars); 1695 if (!preloaded) {
1701 if (length <= 1) continue; 1696 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1702 if (check_offset && i == quarks.length() - 1) { 1697 }
1703 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); 1698 Label ok;
1704 } else { 1699 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
1705 // Here we don't need to check against the end of the input string 1700 switch (length) {
1706 // since this character lies before a character that matched. 1701 case 2: {
1707 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); 1702 if (ShortCutEmitCharacterPair(macro_assembler,
1703 chars[0],
1704 chars[1],
1705 on_failure)) {
1706 } else {
1707 macro_assembler->CheckCharacter(chars[0], &ok);
1708 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1709 macro_assembler->Bind(&ok);
1710 }
1711 break;
1708 } 1712 }
1709 Label ok; 1713 case 4:
1710 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); 1714 macro_assembler->CheckCharacter(chars[3], &ok);
1711 switch (length) { 1715 // Fall through!
1712 case 2: { 1716 case 3:
1713 if (ShortCutEmitCharacterPair(macro_assembler, 1717 macro_assembler->CheckCharacter(chars[0], &ok);
1714 chars[0], 1718 macro_assembler->CheckCharacter(chars[1], &ok);
1715 chars[1], 1719 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1716 on_failure)) { 1720 macro_assembler->Bind(&ok);
1717 } else { 1721 break;
1718 macro_assembler->CheckCharacter(chars[0], &ok); 1722 default:
1719 macro_assembler->CheckNotCharacter(chars[1], on_failure); 1723 UNREACHABLE();
1720 macro_assembler->Bind(&ok); 1724 break;
1721 }
1722 break;
1723 }
1724 case 4:
1725 macro_assembler->CheckCharacter(chars[3], &ok);
1726 // Fall through!
1727 case 3:
1728 macro_assembler->CheckCharacter(chars[0], &ok);
1729 macro_assembler->CheckCharacter(chars[1], &ok);
1730 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1731 macro_assembler->Bind(&ok);
1732 break;
1733 default:
1734 UNREACHABLE();
1735 break;
1736 }
1737 } 1725 }
1726 return true;
1738 } 1727 }
1739 1728
1740 1729
1741 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, 1730 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
1742 RegExpCharacterClass* cc, 1731 RegExpCharacterClass* cc,
1743 int cp_offset, 1732 int cp_offset,
1744 Label* on_failure, 1733 Label* on_failure,
1745 bool check_offset, 1734 bool check_offset,
1746 bool ascii) { 1735 bool ascii,
1736 bool preloaded) {
1747 ZoneList<CharacterRange>* ranges = cc->ranges(); 1737 ZoneList<CharacterRange>* ranges = cc->ranges();
1748 int max_char; 1738 int max_char;
1749 if (ascii) { 1739 if (ascii) {
1750 max_char = String::kMaxAsciiCharCode; 1740 max_char = String::kMaxAsciiCharCode;
1751 } else { 1741 } else {
1752 max_char = String::kMaxUC16CharCode; 1742 max_char = String::kMaxUC16CharCode;
1753 } 1743 }
1754 1744
1755 Label success; 1745 Label success;
1756 1746
(...skipping 25 matching lines...) Expand all
1782 ranges->at(0).IsEverything(max_char)) { 1772 ranges->at(0).IsEverything(max_char)) {
1783 // This is a common case hit by non-anchored expressions. 1773 // This is a common case hit by non-anchored expressions.
1784 // TODO(erikcorry): We should have a macro assembler instruction that just 1774 // TODO(erikcorry): We should have a macro assembler instruction that just
1785 // checks for end of string without loading the character. 1775 // checks for end of string without loading the character.
1786 if (check_offset) { 1776 if (check_offset) {
1787 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); 1777 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
1788 } 1778 }
1789 return; 1779 return;
1790 } 1780 }
1791 1781
1792 if (check_offset) { 1782 if (!preloaded) {
1793 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); 1783 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1794 } else {
1795 // Here we don't need to check against the end of the input string
1796 // since this character lies before a character that matched.
1797 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
1798 } 1784 }
1799 1785
1800 for (int i = 0; i <= last_valid_range; i++) { 1786 for (int i = 0; i < last_valid_range; i++) {
1801 CharacterRange& range = ranges->at(i); 1787 CharacterRange& range = ranges->at(i);
1802 Label next_range; 1788 Label next_range;
1803 uc16 from = range.from(); 1789 uc16 from = range.from();
1804 uc16 to = range.to(); 1790 uc16 to = range.to();
1805 if (from > max_char) { 1791 if (from > max_char) {
1806 continue; 1792 continue;
1807 } 1793 }
1808 if (to > max_char) to = max_char; 1794 if (to > max_char) to = max_char;
1809 if (to == from) { 1795 if (to == from) {
1810 macro_assembler->CheckCharacter(to, char_is_in_class); 1796 macro_assembler->CheckCharacter(to, char_is_in_class);
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
1851 } else { 1837 } else {
1852 if (cc->is_negated()) { 1838 if (cc->is_negated()) {
1853 macro_assembler->GoTo(on_failure); 1839 macro_assembler->GoTo(on_failure);
1854 } 1840 }
1855 } 1841 }
1856 } 1842 }
1857 macro_assembler->Bind(&success); 1843 macro_assembler->Bind(&success);
1858 } 1844 }
1859 1845
1860 1846
1847 RegExpNode::~RegExpNode() {
1848 }
1849
1850
1861 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 1851 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1862 GenerationVariant* variant) { 1852 GenerationVariant* variant) {
1863 // TODO(erikcorry): Implement support. 1853 // TODO(erikcorry): Implement support.
1864 if (info_.follows_word_interest || 1854 if (info_.follows_word_interest ||
1865 info_.follows_newline_interest || 1855 info_.follows_newline_interest ||
1866 info_.follows_start_interest) { 1856 info_.follows_start_interest) {
1867 return FAIL; 1857 return FAIL;
1868 } 1858 }
1869 1859
1870 // If we are generating a greedy loop then don't stop and don't reuse code. 1860 // If we are generating a greedy loop then don't stop and don't reuse code.
(...skipping 30 matching lines...) Expand all
1901 } 1891 }
1902 1892
1903 // If we get here there have been too many variants generated or recursion 1893 // If we get here there have been too many variants generated or recursion
1904 // is too deep. Time to switch to a generic version. The code for 1894 // is too deep. Time to switch to a generic version. The code for
1905 // generic versions above can handle deep recursion properly. 1895 // generic versions above can handle deep recursion properly.
1906 bool ok = variant->Flush(compiler, this); 1896 bool ok = variant->Flush(compiler, this);
1907 return ok ? DONE : FAIL; 1897 return ok ? DONE : FAIL;
1908 } 1898 }
1909 1899
1910 1900
1901 int ActionNode::EatsAtLeast(int recursion_depth) {
1902 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1903 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
1904 return on_success()->EatsAtLeast(recursion_depth + 1);
1905 }
1906
1907
1908 int TextNode::EatsAtLeast(int recursion_depth) {
1909 int answer = Length();
1910 if (answer >= 4) return answer;
1911 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
1912 return answer + on_success()->EatsAtLeast(recursion_depth + 1);
1913 }
1914
1915
1916
1917 int ChoiceNode::EatsAtLeastHelper(int recursion_depth, int start_from_node) {
1918 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1919 int min = 100;
1920 int choice_count = alternatives_->length();
1921 for (int i = start_from_node; i < choice_count; i++) {
1922 RegExpNode* node = alternatives_->at(i).node();
1923 int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1);
1924 if (node_eats_at_least < min) min = node_eats_at_least;
1925 }
1926 return min;
1927 }
1928
1929
1930 int LoopChoiceNode::EatsAtLeast(int recursion_depth) {
1931 return 0;
1932 }
1933
1934
1935 int ChoiceNode::EatsAtLeast(int recursion_depth) {
1936 return EatsAtLeastHelper(recursion_depth, 0);
1937 }
1938
1939
1940 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
1941 static inline uint32_t SmearBitsRight(uint32_t v) {
1942 v |= v >> 1;
1943 v |= v >> 2;
1944 v |= v >> 4;
1945 v |= v >> 8;
1946 v |= v >> 16;
1947 return v;
1948 }
1949
1950
1951 bool QuickCheckDetails::Rationalize(bool asc) {
1952 bool found_useful_op = false;
1953 uint32_t char_mask;
1954 if (asc) {
1955 char_mask = String::kMaxAsciiCharCode;
1956 } else {
1957 char_mask = String::kMaxUC16CharCode;
1958 }
1959 mask_ = 0;
1960 value_ = 0;
1961 int char_shift = 0;
1962 for (int i = 0; i < characters_; i++) {
1963 Position* pos = &positions_[i];
1964 if ((pos->mask & String::kMaxAsciiCharCode) != 0) {
1965 found_useful_op = true;
1966 }
1967 mask_ |= (pos->mask & char_mask) << char_shift;
1968 value_ |= (pos->value & char_mask) << char_shift;
1969 char_shift += asc ? 8 : 16;
1970 }
1971 return found_useful_op;
1972 }
1973
1974
1975 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
1976 GenerationVariant* variant,
1977 bool preload_has_checked_bounds,
1978 Label* on_possible_success,
1979 QuickCheckDetails* details,
1980 bool fall_through_on_failure) {
1981 if (details->characters() == 0) return false;
1982 GetQuickCheckDetails(details, compiler, 0);
1983 if (!details->Rationalize(compiler->ascii())) return false;
1984 uint32_t mask = details->mask();
1985 uint32_t value = details->value();
1986
1987 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1988
1989 if (variant->characters_preloaded() != details->characters()) {
1990 assembler->LoadCurrentCharacter(variant->cp_offset(),
1991 variant->backtrack(),
1992 !preload_has_checked_bounds,
1993 details->characters());
1994 }
1995
1996
1997 bool need_mask = true;
1998
1999 if (details->characters() == 1) {
2000 // If number of characters preloaded is 1 then we used a byte or 16 bit
2001 // load so the value is already masked down.
2002 uint32_t char_mask;
2003 if (compiler->ascii()) {
2004 char_mask = String::kMaxAsciiCharCode;
2005 } else {
2006 char_mask = String::kMaxUC16CharCode;
2007 }
2008 if ((mask & char_mask) == char_mask) need_mask = false;
2009 } else {
2010 // For 2-character preloads in ASCII mode we also use a 16 bit load with
2011 // zero extend.
2012 if (details->characters() == 2 && compiler->ascii()) {
2013 if ((mask & 0xffff) == 0xffff) need_mask = false;
2014 } else {
2015 if (mask == 0xffffffff) need_mask = false;
2016 }
2017 }
2018
2019 if (fall_through_on_failure) {
2020 if (need_mask) {
2021 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
2022 } else {
2023 assembler->CheckCharacter(value, on_possible_success);
2024 }
2025 } else {
2026 if (need_mask) {
2027 assembler->CheckNotCharacterAfterAnd(value, mask, variant->backtrack());
2028 } else {
2029 assembler->CheckNotCharacter(value, variant->backtrack());
2030 }
2031 }
2032 return true;
2033 }
2034
2035
2036 // Here is the meat of GetQuickCheckDetails (see also the comment on the
2037 // super-class in the .h file).
2038 //
2039 // We iterate along the text object, building up for each character a
2040 // mask and value that can be used to test for a quick failure to match.
2041 // The masks and values for the positions will be combined into a single
2042 // machine word for the current character width in order to be used in
2043 // generating a quick check.
2044 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
2045 RegExpCompiler* compiler,
2046 int characters_filled_in) {
2047 ASSERT(characters_filled_in < details->characters());
2048 int characters = details->characters();
2049 int char_mask;
2050 int char_shift;
2051 if (compiler->ascii()) {
2052 char_mask = String::kMaxAsciiCharCode;
2053 char_shift = 8;
2054 } else {
2055 char_mask = String::kMaxUC16CharCode;
2056 char_shift = 16;
2057 }
2058 for (int k = 0; k < elms_->length(); k++) {
2059 TextElement elm = elms_->at(k);
2060 if (elm.type == TextElement::ATOM) {
2061 Vector<const uc16> quarks = elm.data.u_atom->data();
2062 for (int i = 0; i < characters && i < quarks.length(); i++) {
2063 QuickCheckDetails::Position* pos =
2064 details->positions(characters_filled_in);
2065 if (compiler->ignore_case()) {
2066 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2067 uc16 c = quarks[i];
2068 int length = uncanonicalize.get(c, '\0', chars);
2069 if (length < 2) {
2070 // This letter has no case equivalents, so it's nice and simple
2071 // and the mask-compare will determine definitely whether we have
2072 // a match at this character position.
2073 pos->mask = char_mask;
2074 pos->value = c;
2075 pos->determines_perfectly = true;
2076 } else {
2077 uint32_t common_bits = char_mask;
2078 uint32_t bits = chars[0];
2079 for (int j = 1; j < length; j++) {
2080 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
2081 common_bits ^= differing_bits;
2082 bits &= common_bits;
2083 }
2084 // If length is 2 and common bits has only one zero in it then
2085 // our mask and compare instruction will determine definitely
2086 // whether we have a match at this character position. Otherwise
2087 // it can only be an approximate check.
2088 uint32_t one_zero = (common_bits | ~char_mask);
2089 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
2090 pos->determines_perfectly = true;
2091 }
2092 pos->mask = common_bits;
2093 pos->value = bits;
2094 }
2095 } else {
2096 // Don't ignore case. Nice simple case where the mask-compare will
2097 // determine definitely whether we have a match at this character
2098 // position.
2099 pos->mask = char_mask;
2100 pos->value = quarks[i];
2101 pos->determines_perfectly = true;
2102 }
2103 characters_filled_in++;
2104 ASSERT(characters_filled_in <= details->characters());
2105 if (characters_filled_in == details->characters()) {
2106 return;
2107 }
2108 }
2109 } else {
2110 QuickCheckDetails::Position* pos =
2111 details->positions(characters_filled_in);
2112 RegExpCharacterClass* tree = elm.data.u_char_class;
2113 ZoneList<CharacterRange>* ranges = tree->ranges();
2114 CharacterRange range = ranges->at(0);
2115 if (tree->is_negated()) {
2116 // A quick check uses multi-character mask and compare. There is no
2117 // useful way to incorporate a negative char class into this scheme
2118 // so we just conservatively create a mask and value that will always
2119 // succeed.
2120 pos->mask = 0;
2121 pos->value = 0;
2122 } else {
2123 uint32_t differing_bits = (range.from() ^ range.to());
2124 // A mask and compare is only perfect if the differing bits form a
2125 // number like 00011111 with one single block of trailing 1s.
2126 if ((differing_bits & (differing_bits + 1)) == 0) {
2127 pos->determines_perfectly = true;
2128 }
2129 uint32_t common_bits = ~SmearBitsRight(differing_bits);
2130 uint32_t bits = (range.from() & common_bits);
2131 for (int i = 1; i < ranges->length(); i++) {
2132 // Here we are combining more ranges into the mask and compare
2133 // value. With each new range the mask becomes more sparse and
2134 // so the chances of a false positive rise. A character class
2135 // with multiple ranges is assumed never to be equivalent to a
2136 // mask and compare operation.
2137 pos->determines_perfectly = false;
2138 CharacterRange range = ranges->at(i);
2139 uint32_t new_common_bits = (range.from() ^ range.to());
2140 new_common_bits = ~SmearBitsRight(new_common_bits);
2141 common_bits &= new_common_bits;
2142 bits &= new_common_bits;
2143 uint32_t differing_bits = (range.from() & common_bits) ^ bits;
2144 common_bits ^= differing_bits;
2145 bits &= common_bits;
2146 }
2147 pos->mask = common_bits;
2148 pos->value = bits;
2149 }
2150 characters_filled_in++;
2151 ASSERT(characters_filled_in <= details->characters());
2152 if (characters_filled_in == details->characters()) {
2153 return;
2154 }
2155 }
2156 }
2157 ASSERT(characters_filled_in != details->characters());
2158 on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in);
2159 }
2160
2161
2162 void QuickCheckDetails::Clear() {
2163 for (int i = 0; i < characters_; i++) {
2164 positions_[i].mask = 0;
2165 positions_[i].value = 0;
2166 positions_[i].determines_perfectly = false;
2167 }
2168 characters_ = 0;
2169 }
2170
2171
2172 void QuickCheckDetails::Advance(int by, bool ascii) {
2173 ASSERT(by > 0);
2174 if (by >= characters_) {
2175 Clear();
2176 return;
2177 }
2178 for (int i = 0; i < characters_ - by; i++) {
2179 positions_[i] = positions_[by + i];
2180 }
2181 for (int i = characters_ - by; i < characters_; i++) {
2182 positions_[i].mask = 0;
2183 positions_[i].value = 0;
2184 positions_[i].determines_perfectly = false;
2185 }
2186 characters_ -= by;
2187 // We could change mask_ and value_ here but we would never advance unless
2188 // they had already been used in a check and they won't be used again because
2189 // it would gain us nothing. So there's no point.
2190 }
2191
2192
2193 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
2194 ASSERT(characters_ == other->characters_);
2195 for (int i = from_index; i < characters_; i++) {
2196 QuickCheckDetails::Position* pos = positions(i);
2197 QuickCheckDetails::Position* other_pos = other->positions(i);
2198 if (pos->mask != other_pos->mask ||
2199 pos->value != other_pos->value ||
2200 !other_pos->determines_perfectly) {
2201 // Our mask-compare operation will be approximate unless we have the
2202 // exact same operation on both sides of the alternation.
2203 pos->determines_perfectly = false;
2204 }
2205 pos->mask &= other_pos->mask;
2206 pos->value &= pos->mask;
2207 other_pos->value &= pos->mask;
2208 uc16 differing_bits = (pos->value ^ other_pos->value);
2209 pos->mask &= ~differing_bits;
2210 pos->value &= pos->mask;
2211 }
2212 }
2213
2214
2215 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2216 RegExpCompiler* compiler,
2217 int characters_filled_in) {
2218 int choice_count = alternatives_->length();
2219 ASSERT(choice_count > 0);
2220 alternatives_->at(0).node()->GetQuickCheckDetails(details,
2221 compiler,
2222 characters_filled_in);
2223 for (int i = 1; i < choice_count; i++) {
2224 QuickCheckDetails new_details(details->characters());
2225 RegExpNode* node = alternatives_->at(i).node();
2226 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
2227 // Here we merge the quick match details of the two branches.
2228 details->Merge(&new_details, characters_filled_in);
2229 }
2230 }
2231
2232
2233 // We call this repeatedly to generate code for each pass over the text node.
2234 // The passes are in increasing order of difficulty because we hope one
2235 // of the first passes will fail in which case we are saved the work of the
2236 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
2237 // we will check the '%' in the first pass, the case independent 'a' in the
2238 // second pass and the character class in the last pass.
2239 //
2240 // The passes are done from right to left, so for example to test for /bar/
2241 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
2242 // and then a 'b' with offset 0. This means we can avoid the end-of-input
2243 // bounds check most of the time. In the example we only need to check for
2244 // end-of-input when loading the putative 'r'.
2245 //
2246 // A slight complication involves the fact that the first character may already
2247 // be fetched into a register by the previous node. In this case we want to
2248 // do the test for that character first. We do this in separate passes. The
2249 // 'preloaded' argument indicates that we are doing such a 'pass'. If such a
2250 // pass has been performed then subsequent passes will have true in
2251 // first_element_checked to indicate that that character does not need to be
2252 // checked again.
2253 //
2254 // In addition to all this we are passed a GenerationVariant, which can
2255 // contain an AlternativeGeneration object. In this AlternativeGeneration
2256 // object we can see details of any quick check that was already passed in
2257 // order to get to the code we are now generating. The quick check can involve
2258 // loading characters, which means we do not need to recheck the bounds
2259 // up to the limit the quick check already checked. In addition the quick
2260 // check can have involved a mask and compare operation which may simplify
2261 // or obviate the need for further checks at some character positions.
2262 void TextNode::TextEmitPass(RegExpCompiler* compiler,
2263 TextEmitPassType pass,
2264 bool preloaded,
2265 GenerationVariant* variant,
2266 bool first_element_checked,
2267 int* checked_up_to) {
2268 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2269 bool ascii = compiler->ascii();
2270 Label* backtrack = variant->backtrack();
2271 QuickCheckDetails* quick_check = variant->quick_check_performed();
2272 int element_count = elms_->length();
2273 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
2274 TextElement elm = elms_->at(i);
2275 int cp_offset = variant->cp_offset() + elm.cp_offset;
2276 if (elm.type == TextElement::ATOM) {
2277 if (pass == NON_ASCII_MATCH ||
2278 pass == CHARACTER_MATCH ||
2279 pass == CASE_CHARACTER_MATCH) {
2280 Vector<const uc16> quarks = elm.data.u_atom->data();
2281 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
2282 bool bound_checked = true; // Most ops will check their bounds.
2283 if (first_element_checked && i == 0 && j == 0) continue;
2284 if (quick_check != NULL &&
2285 elm.cp_offset + j < quick_check->characters() &&
2286 quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
2287 continue;
2288 }
2289 if (pass == NON_ASCII_MATCH) {
2290 ASSERT(ascii);
2291 if (quarks[j] > String::kMaxAsciiCharCode) {
2292 assembler->GoTo(backtrack);
2293 return;
2294 }
2295 } else if (pass == CHARACTER_MATCH) {
2296 if (compiler->ignore_case()) {
2297 bound_checked = EmitAtomNonLetter(assembler,
2298 quarks[j],
2299 backtrack,
2300 cp_offset + j,
2301 *checked_up_to < cp_offset + j,
2302 preloaded);
2303 } else {
2304 if (!preloaded) {
2305 assembler->LoadCurrentCharacter(cp_offset + j,
2306 backtrack,
2307 *checked_up_to < cp_offset + j);
2308 }
2309 assembler->CheckNotCharacter(quarks[j], backtrack);
2310 }
2311 } else {
2312 ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
2313 ASSERT(compiler->ignore_case());
2314 bound_checked = EmitAtomLetter(assembler,
2315 quarks[j],
2316 backtrack,
2317 cp_offset + j,
2318 *checked_up_to < cp_offset + j,
2319 preloaded);
2320 }
2321 if (pass != NON_ASCII_MATCH && bound_checked) {
2322 if (cp_offset + j > *checked_up_to) {
2323 *checked_up_to = cp_offset + j;
2324 }
2325 }
2326 }
2327 }
2328 } else {
2329 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
2330 if (first_element_checked && i == 0) continue;
2331 if (quick_check != NULL &&
2332 elm.cp_offset < quick_check->characters() &&
2333 quick_check->positions(elm.cp_offset)->determines_perfectly) {
2334 continue;
2335 }
2336 if (pass == CHARACTER_CLASS_MATCH) {
2337 RegExpCharacterClass* cc = elm.data.u_char_class;
2338 EmitCharClass(assembler,
2339 cc,
2340 cp_offset,
2341 backtrack,
2342 *checked_up_to < cp_offset,
2343 ascii,
2344 preloaded);
2345 if (cp_offset > *checked_up_to) {
2346 *checked_up_to = cp_offset;
2347 }
2348 }
2349 }
2350 }
2351 }
2352
2353
2354 int TextNode::Length() {
2355 TextElement elm = elms_->last();
2356 ASSERT(elm.cp_offset >= 0);
2357 if (elm.type == TextElement::ATOM) {
2358 return elm.cp_offset + elm.data.u_atom->data().length();
2359 } else {
2360 return elm.cp_offset + 1;
2361 }
2362 }
2363
2364
1911 // This generates the code to match a text node. A text node can contain 2365 // This generates the code to match a text node. A text node can contain
1912 // straight character sequences (possibly to be matched in a case-independent 2366 // straight character sequences (possibly to be matched in a case-independent
1913 // way) and character classes. In order to be most efficient we test for the 2367 // way) and character classes. For efficiency we do not do this in a single
1914 // simple things first and then move on to the more complicated things. The 2368 // pass from left to right. Instead we pass over the text node several times,
1915 // simplest thing is a non-letter or a letter if we are matching case. The 2369 // emitting code for some character positions every time. See the comment on
1916 // next-most simple thing is a case-independent letter. The least simple is 2370 // TextEmitPass for details.
1917 // a character class. Another optimization is that we test the last one first.
1918 // If that succeeds we don't need to test for the end of the string when we
1919 // load other characters.
1920 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { 2371 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
1921 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1922 Label *backtrack = variant->backtrack();
1923 LimitResult limit_result = LimitVersions(compiler, variant); 2372 LimitResult limit_result = LimitVersions(compiler, variant);
1924 if (limit_result == FAIL) return false; 2373 if (limit_result == FAIL) return false;
1925 if (limit_result == DONE) return true; 2374 if (limit_result == DONE) return true;
1926 ASSERT(limit_result == CONTINUE); 2375 ASSERT(limit_result == CONTINUE);
1927 2376
1928 int element_count = elms_->length(); 2377 if (info()->follows_word_interest ||
1929 ASSERT(element_count != 0); 2378 info()->follows_newline_interest ||
2379 info()->follows_start_interest) {
2380 return false;
2381 }
2382
1930 if (info()->at_end) { 2383 if (info()->at_end) {
1931 macro_assembler->GoTo(backtrack); 2384 compiler->macro_assembler()->GoTo(variant->backtrack());
1932 return true; 2385 return true;
1933 } 2386 }
1934 // First check for non-ASCII text. 2387
1935 // TODO(plesner): We should do this at node level.
1936 if (compiler->ascii()) { 2388 if (compiler->ascii()) {
1937 for (int i = element_count - 1; i >= 0; i--) { 2389 int dummy = 0;
1938 TextElement elm = elms_->at(i); 2390 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy);
1939 if (elm.type == TextElement::ATOM) { 2391 }
1940 Vector<const uc16> quarks = elm.data.u_atom->data(); 2392
1941 for (int j = quarks.length() - 1; j >= 0; j--) { 2393 bool first_elt_done = false;
1942 if (quarks[j] > String::kMaxAsciiCharCode) { 2394 int bound_checked_to = variant->cp_offset() - 1;
1943 macro_assembler->GoTo(backtrack); 2395 QuickCheckDetails* quick_check = variant->quick_check_performed();
1944 return true; 2396 if (quick_check != NULL) {
1945 } 2397 bound_checked_to += quick_check->characters();
1946 } 2398 }
1947 } else { 2399
1948 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 2400 // If a character is preloaded into the current character register then
1949 } 2401 // check that now.
1950 } 2402 if (variant->characters_preloaded() == 1) {
1951 } 2403 TextEmitPass(compiler,
1952 // Second, handle straight character matches. 2404 CHARACTER_MATCH,
1953 int checked_up_to = -1; 2405 true,
1954 for (int i = element_count - 1; i >= 0; i--) { 2406 variant,
1955 TextElement elm = elms_->at(i); 2407 false,
1956 ASSERT(elm.cp_offset >= 0); 2408 &bound_checked_to);
1957 int cp_offset = variant->cp_offset() + elm.cp_offset; 2409 if (compiler->ignore_case()) {
1958 if (elm.type == TextElement::ATOM) { 2410 TextEmitPass(compiler,
1959 Vector<const uc16> quarks = elm.data.u_atom->data(); 2411 CASE_CHARACTER_MATCH,
1960 int last_cp_offset = cp_offset + quarks.length(); 2412 true,
1961 if (compiler->ignore_case()) { 2413 variant,
1962 EmitAtomNonLetters(macro_assembler, 2414 false,
1963 elm, 2415 &bound_checked_to);
1964 quarks, 2416 }
1965 backtrack, 2417 TextEmitPass(compiler,
1966 cp_offset, 2418 CHARACTER_CLASS_MATCH,
1967 checked_up_to < last_cp_offset); 2419 true,
1968 } else { 2420 variant,
1969 macro_assembler->CheckCharacters(quarks, 2421 false,
1970 cp_offset, 2422 &bound_checked_to);
1971 backtrack, 2423 first_elt_done = true;
1972 checked_up_to < last_cp_offset); 2424 }
1973 } 2425
1974 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; 2426 TextEmitPass(compiler,
1975 } else { 2427 CHARACTER_MATCH,
1976 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); 2428 false,
1977 } 2429 variant,
1978 } 2430 first_elt_done,
1979 // Third, handle case independent letter matches if any. 2431 &bound_checked_to);
1980 if (compiler->ignore_case()) { 2432 if (compiler->ignore_case()) {
1981 for (int i = element_count - 1; i >= 0; i--) { 2433 TextEmitPass(compiler,
1982 TextElement elm = elms_->at(i); 2434 CASE_CHARACTER_MATCH,
1983 int cp_offset = variant->cp_offset() + elm.cp_offset; 2435 false,
1984 if (elm.type == TextElement::ATOM) { 2436 variant,
1985 Vector<const uc16> quarks = elm.data.u_atom->data(); 2437 first_elt_done,
1986 int last_cp_offset = cp_offset + quarks.length(); 2438 &bound_checked_to);
1987 EmitAtomLetters(macro_assembler, 2439 }
1988 elm, 2440 TextEmitPass(compiler,
1989 quarks, 2441 CHARACTER_CLASS_MATCH,
1990 backtrack, 2442 false,
1991 cp_offset, 2443 variant,
1992 checked_up_to < last_cp_offset); 2444 first_elt_done,
1993 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; 2445 &bound_checked_to);
1994 } 2446
1995 } 2447 GenerationVariant successor_variant(*variant);
1996 } 2448 successor_variant.AdvanceVariant(Length(), compiler->ascii());
1997 // If the fast character matches passed then do the character classes.
1998 for (int i = element_count - 1; i >= 0; i--) {
1999 TextElement elm = elms_->at(i);
2000 int cp_offset = variant->cp_offset() + elm.cp_offset;
2001 if (elm.type == TextElement::CHAR_CLASS) {
2002 RegExpCharacterClass* cc = elm.data.u_char_class;
2003 EmitCharClass(macro_assembler,
2004 cc,
2005 cp_offset,
2006 backtrack,
2007 checked_up_to < cp_offset,
2008 compiler->ascii());
2009 if (cp_offset > checked_up_to) checked_up_to = cp_offset;
2010 }
2011 }
2012
2013 GenerationVariant new_variant(*variant);
2014 new_variant.set_cp_offset(checked_up_to + 1);
2015 RecursionCheck rc(compiler); 2449 RecursionCheck rc(compiler);
2016 return on_success()->Emit(compiler, &new_variant); 2450 return on_success()->Emit(compiler, &successor_variant);
2451 }
2452
2453
2454 void GenerationVariant::AdvanceVariant(int by, bool ascii) {
2455 ASSERT(by > 0);
2456 // We don't have an instruction for shifting the current character register
2457 // down or for using a shifted value for anything so lets just forget that
2458 // we preloaded any characters into it.
2459 characters_preloaded_ = 0;
2460 // Adjust the offsets of the quick check performed information. This
2461 // information is used to find out what we already determined about the
2462 // characters by means of mask and compare.
2463 quick_check_performed_.Advance(by, ascii);
2464 cp_offset_ += by;
2017 } 2465 }
2018 2466
2019 2467
2020 void TextNode::MakeCaseIndependent() { 2468 void TextNode::MakeCaseIndependent() {
2021 int element_count = elms_->length(); 2469 int element_count = elms_->length();
2022 for (int i = 0; i < element_count; i++) { 2470 for (int i = 0; i < element_count; i++) {
2023 TextElement elm = elms_->at(i); 2471 TextElement elm = elms_->at(i);
2024 if (elm.type == TextElement::CHAR_CLASS) { 2472 if (elm.type == TextElement::CHAR_CLASS) {
2025 RegExpCharacterClass* cc = elm.data.u_char_class; 2473 RegExpCharacterClass* cc = elm.data.u_char_class;
2026 ZoneList<CharacterRange>* ranges = cc->ranges(); 2474 ZoneList<CharacterRange>* ranges = cc->ranges();
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
2103 return true; 2551 return true;
2104 } 2552 }
2105 ASSERT(variant->stop_node() == NULL); 2553 ASSERT(variant->stop_node() == NULL);
2106 if (!variant->is_trivial()) { 2554 if (!variant->is_trivial()) {
2107 return variant->Flush(compiler, this); 2555 return variant->Flush(compiler, this);
2108 } 2556 }
2109 return ChoiceNode::Emit(compiler, variant); 2557 return ChoiceNode::Emit(compiler, variant);
2110 } 2558 }
2111 2559
2112 2560
2561 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
2562 int start_from_node) {
2563 bool ascii = compiler->ascii();
2564 int preload_characters = ChoiceNode::EatsAtLeastHelper(0, start_from_node);
2565 #ifdef CAN_READ_UNALIGNED
2566 if (ascii) {
2567 if (preload_characters > 4) preload_characters = 4;
2568 // We can't preload 3 characters because there is no machine instruction
2569 // to do that. We can't just load 4 because we could be reading
2570 // beyond the end of the string, which could cause a memory fault.
2571 if (preload_characters == 3) preload_characters = 2;
2572 } else {
2573 if (preload_characters > 2) preload_characters = 2;
2574 }
2575 #else
2576 if (preload_characters > 1) preload_characters = 1;
2577 #endif
2578 return preload_characters;
2579 }
2580
2581
2582 // This class is used when generating the alternatives in a choice node. It
2583 // records the way the alternative is being code generated.
2584 class AlternativeGeneration: public Malloced {
2585 public:
2586 AlternativeGeneration()
2587 : possible_success(),
2588 expects_preload(false),
2589 after(),
2590 quick_check_details() { }
2591 Label possible_success;
2592 bool expects_preload;
2593 Label after;
2594 QuickCheckDetails quick_check_details;
2595 };
2596
2597
2598 // Creates a list of AlternativeGenerations. If the list has a reasonable
2599 // size then it is on the stack, otherwise the excess is on the heap.
2600 class AlternativeGenerationList {
2601 public:
2602 explicit AlternativeGenerationList(int count)
2603 : alt_gens_(count) {
2604 for (int i = 0; i < count && i < kAFew; i++) {
2605 alt_gens_.Add(a_few_alt_gens_ + i);
2606 }
2607 for (int i = kAFew; i < count; i++) {
2608 alt_gens_.Add(new AlternativeGeneration());
2609 }
2610 }
2611 ~AlternativeGenerationList() {
2612 for (int i = 0; i < alt_gens_.length(); i++) {
2613 alt_gens_[i]->possible_success.Unuse();
2614 alt_gens_[i]->after.Unuse();
2615 }
2616 for (int i = kAFew; i < alt_gens_.length(); i++) {
2617 delete alt_gens_[i];
2618 alt_gens_[i] = NULL;
2619 }
2620 }
2621
2622 AlternativeGeneration* at(int i) {
2623 return alt_gens_[i];
2624 }
2625 private:
2626 static const int kAFew = 10;
2627 ZoneList<AlternativeGeneration*> alt_gens_;
2628 AlternativeGeneration a_few_alt_gens_[kAFew];
2629 };
2630
2631
2632 /* Code generation for choice nodes.
2633 *
2634 * We generate quick checks that do a mask and compare to eliminate a
2635 * choice. If the quick check succeeds then it jumps to the continuation to
2636 * do slow checks and check subsequent nodes. If it fails (the common case)
2637 * it falls through to the next choice.
2638 *
2639 * Here is the desired flow graph. Nodes directly below each other imply
2640 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
2641 * 3 doesn't have a quick check so we have to call the slow check.
2642 * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
2643 * regexp continuation is generated directly after the Sn node, up to the
2644 * next GoTo if we decide to reuse some already generated code. Some
2645 * nodes expect preload_characters to be preloaded into the current
2646 * character register. R nodes do this preloading. Vertices are marked
2647 * F for failures and S for success (possible success in the case of quick
2648 * nodes). L, V, < and > are used as arrow heads.
2649 *
2650 * ----------> R
2651 * |
2652 * V
2653 * Q1 -----> S1
2654 * | S /
2655 * F| /
2656 * | F/
2657 * | /
2658 * | R
2659 * | /
2660 * V L
2661 * Q2 -----> S2
2662 * | S /
2663 * F| /
2664 * | F/
2665 * | /
2666 * | R
2667 * | /
2668 * V L
2669 * S3
2670 * |
2671 * F|
2672 * |
2673 * R
2674 * |
2675 * backtrack V
2676 * <----------Q4
2677 * \ F |
2678 * \ |S
2679 * \ F V
2680 * \-----S4
2681 *
2682 * For greedy loops we reverse our expectation and expect to match rather
2683 * than fail. Therefore we want the loop code to look like this (U is the
2684 * unwind code that steps back in the greedy loop). The following alternatives
2685 * look the same as above.
2686 * _____
2687 * / \
2688 * V |
2689 * ----------> S1 |
2690 * /| |
2691 * / |S |
2692 * F/ \_____/
2693 * /
2694 * |<-----------
2695 * | \
2696 * V \
2697 * Q2 ---> S2 \
2698 * | S / |
2699 * F| / |
2700 * | F/ |
2701 * | / |
2702 * | R |
2703 * | / |
2704 * F VL |
2705 * <------U |
2706 * back |S |
2707 * \______________/
2708 */
2709
2710
2113 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { 2711 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
2114 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 2712 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2115 int choice_count = alternatives_->length(); 2713 int choice_count = alternatives_->length();
2116 #ifdef DEBUG 2714 #ifdef DEBUG
2117 for (int i = 0; i < choice_count - 1; i++) { 2715 for (int i = 0; i < choice_count - 1; i++) {
2118 GuardedAlternative alternative = alternatives_->at(i); 2716 GuardedAlternative alternative = alternatives_->at(i);
2119 ZoneList<Guard*>* guards = alternative.guards(); 2717 ZoneList<Guard*>* guards = alternative.guards();
2120 int guard_count = (guards == NULL) ? 0 : guards->length(); 2718 int guard_count = (guards == NULL) ? 0 : guards->length();
2121 for (int j = 0; j < guard_count; j++) { 2719 for (int j = 0; j < guard_count; j++) {
2122 ASSERT(!variant->mentions_reg(guards->at(j)->reg())); 2720 ASSERT(!variant->mentions_reg(guards->at(j)->reg()));
2123 } 2721 }
2124 } 2722 }
2125 #endif 2723 #endif
2126 2724
2127 LimitResult limit_result = LimitVersions(compiler, variant); 2725 LimitResult limit_result = LimitVersions(compiler, variant);
2128 if (limit_result == DONE) return true; 2726 if (limit_result == DONE) return true;
2129 if (limit_result == FAIL) return false; 2727 if (limit_result == FAIL) return false;
2130 ASSERT(limit_result == CONTINUE); 2728 ASSERT(limit_result == CONTINUE);
2131 2729
2132 RecursionCheck rc(compiler); 2730 RecursionCheck rc(compiler);
2133 2731
2134 GenerationVariant* current_variant = variant; 2732 GenerationVariant* current_variant = variant;
2135 2733
2136 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); 2734 int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
2137 bool greedy_loop = false; 2735 bool greedy_loop = false;
2138 Label greedy_loop_label; 2736 Label greedy_loop_label;
2139 GenerationVariant counter_backtrack_variant(&greedy_loop_label); 2737 GenerationVariant counter_backtrack_variant;
2738 counter_backtrack_variant.set_backtrack(&greedy_loop_label);
2140 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { 2739 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
2141 // Here we have special handling for greedy loops containing only text nodes 2740 // Here we have special handling for greedy loops containing only text nodes
2142 // and other simple nodes. These are handled by pushing the current 2741 // and other simple nodes. These are handled by pushing the current
2143 // position on the stack and then incrementing the current position each 2742 // position on the stack and then incrementing the current position each
2144 // time around the switch. On backtrack we decrement the current position 2743 // time around the switch. On backtrack we decrement the current position
2145 // and check it against the pushed value. This avoids pushing backtrack 2744 // and check it against the pushed value. This avoids pushing backtrack
2146 // information for each iteration of the loop, which could take up a lot of 2745 // information for each iteration of the loop, which could take up a lot of
2147 // space. 2746 // space.
2148 greedy_loop = true; 2747 greedy_loop = true;
2149 ASSERT(variant->stop_node() == NULL); 2748 ASSERT(variant->stop_node() == NULL);
2150 macro_assembler->PushCurrentPosition(); 2749 macro_assembler->PushCurrentPosition();
2151 current_variant = &counter_backtrack_variant; 2750 current_variant = &counter_backtrack_variant;
2152 Label greedy_match_failed; 2751 Label greedy_match_failed;
2153 GenerationVariant greedy_match_variant(&greedy_match_failed); 2752 GenerationVariant greedy_match_variant;
2753 greedy_match_variant.set_backtrack(&greedy_match_failed);
2154 Label loop_label; 2754 Label loop_label;
2155 macro_assembler->Bind(&loop_label); 2755 macro_assembler->Bind(&loop_label);
2156 greedy_match_variant.set_stop_node(this); 2756 greedy_match_variant.set_stop_node(this);
2157 greedy_match_variant.set_loop_label(&loop_label); 2757 greedy_match_variant.set_loop_label(&loop_label);
2158 bool ok = alternatives_->at(0).node()->Emit(compiler, 2758 bool ok = alternatives_->at(0).node()->Emit(compiler,
2159 &greedy_match_variant); 2759 &greedy_match_variant);
2160 macro_assembler->Bind(&greedy_match_failed); 2760 macro_assembler->Bind(&greedy_match_failed);
2161 if (!ok) { 2761 if (!ok) {
2162 greedy_loop_label.Unuse(); 2762 greedy_loop_label.Unuse();
2163 return false; 2763 return false;
2164 } 2764 }
2165 } 2765 }
2166 2766
2167 Label second_choice; // For use in greedy matches. 2767 Label second_choice; // For use in greedy matches.
2168 macro_assembler->Bind(&second_choice); 2768 macro_assembler->Bind(&second_choice);
2169 2769
2770 int first_normal_choice = greedy_loop ? 1 : 0;
2771
2772 int preload_characters = CalculatePreloadCharacters(compiler,
2773 first_normal_choice);
2774 bool preload_is_current = false;
2775 bool preload_has_checked_bounds = false;
2776
2777 AlternativeGenerationList alt_gens(choice_count);
2778
2170 // For now we just call all choices one after the other. The idea ultimately 2779 // For now we just call all choices one after the other. The idea ultimately
2171 // is to use the Dispatch table to try only the relevant ones. 2780 // is to use the Dispatch table to try only the relevant ones.
2172 for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) { 2781 for (int i = first_normal_choice; i < choice_count; i++) {
2173 GuardedAlternative alternative = alternatives_->at(i); 2782 GuardedAlternative alternative = alternatives_->at(i);
2174 Label after; 2783 AlternativeGeneration* alt_gen(alt_gens.at(i));
2784 alt_gen->quick_check_details.set_characters(preload_characters);
2175 ZoneList<Guard*>* guards = alternative.guards(); 2785 ZoneList<Guard*>* guards = alternative.guards();
2176 int guard_count = (guards == NULL) ? 0 : guards->length(); 2786 int guard_count = (guards == NULL) ? 0 : guards->length();
2787
2177 GenerationVariant new_variant(*current_variant); 2788 GenerationVariant new_variant(*current_variant);
2178 new_variant.set_backtrack(&after); 2789 new_variant.set_characters_preloaded(preload_is_current ?
2179 for (int j = 0; j < guard_count; j++) { 2790 preload_characters :
2180 GenerateGuard(macro_assembler, guards->at(j), &new_variant); 2791 0);
2792 new_variant.quick_check_performed()->Clear();
2793 alt_gen->expects_preload = preload_is_current;
2794 bool generate_full_check_inline = false;
2795 if (alternative.node()->EmitQuickCheck(compiler,
2796 &new_variant,
2797 preload_has_checked_bounds,
2798 &alt_gen->possible_success,
2799 &alt_gen->quick_check_details,
2800 i < choice_count - 1)) {
2801 // Quick check was generated for this choice.
2802 preload_is_current = true;
2803 preload_has_checked_bounds = true;
2804 // On the last choice in the ChoiceNode we generated the quick
2805 // check to fall through on possible success. So now we need to
2806 // generate the full check inline.
2807 if (i == choice_count - 1) {
2808 macro_assembler->Bind(&alt_gen->possible_success);
2809 new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
2810 generate_full_check_inline = true;
2811 }
2812 } else {
2813 // No quick check was generated. Put the full code here.
2814 if (i < choice_count - 1) {
2815 new_variant.set_backtrack(&alt_gen->after);
2816 }
2817 generate_full_check_inline = true;
2181 } 2818 }
2182 if (!alternative.node()->Emit(compiler, &new_variant)) { 2819 if (generate_full_check_inline) {
2183 after.Unuse(); 2820 if (preload_is_current) {
2184 return false; 2821 new_variant.set_characters_preloaded(preload_characters);
2822 }
2823 for (int j = 0; j < guard_count; j++) {
2824 GenerateGuard(macro_assembler, guards->at(j), &new_variant);
2825 }
2826 if (!alternative.node()->Emit(compiler, &new_variant)) {
2827 greedy_loop_label.Unuse();
2828 return false;
2829 }
2830 preload_is_current = false;
2185 } 2831 }
2186 macro_assembler->Bind(&after); 2832 macro_assembler->Bind(&alt_gen->after);
2187 } 2833 }
2188 GuardedAlternative alternative = alternatives_->at(choice_count - 1);
2189 ZoneList<Guard*>* guards = alternative.guards();
2190 int guard_count = (guards == NULL) ? 0 : guards->length();
2191 for (int j = 0; j < guard_count; j++) {
2192 GenerateGuard(macro_assembler, guards->at(j), current_variant);
2193 }
2194 bool ok = alternative.node()->Emit(compiler, current_variant);
2195 if (!ok) return false;
2196 if (greedy_loop) { 2834 if (greedy_loop) {
2197 macro_assembler->Bind(&greedy_loop_label); 2835 macro_assembler->Bind(&greedy_loop_label);
2198 // If we have unwound to the bottom then backtrack. 2836 // If we have unwound to the bottom then backtrack.
2199 macro_assembler->CheckGreedyLoop(variant->backtrack()); 2837 macro_assembler->CheckGreedyLoop(variant->backtrack());
2200 // Otherwise try the second priority at an earlier position. 2838 // Otherwise try the second priority at an earlier position.
2201 macro_assembler->AdvanceCurrentPosition(-text_length); 2839 macro_assembler->AdvanceCurrentPosition(-text_length);
2202 macro_assembler->GoTo(&second_choice); 2840 macro_assembler->GoTo(&second_choice);
2203 } 2841 }
2842 // At this point we need to generate slow checks for the alternatives where
2843 // the quick check was inlined. We can recognize these because the associated
2844 // label was bound.
2845 for (int i = first_normal_choice; i < choice_count - 1; i++) {
2846 AlternativeGeneration* alt_gen = alt_gens.at(i);
2847 if (!EmitOutOfLineContinuation(compiler,
2848 current_variant,
2849 alternatives_->at(i),
2850 alt_gen,
2851 preload_characters,
2852 alt_gens.at(i + 1)->expects_preload)) {
2853 return false;
2854 }
2855 }
2204 return true; 2856 return true;
2205 } 2857 }
2206 2858
2207 2859
2860 bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
2861 GenerationVariant* variant,
2862 GuardedAlternative alternative,
2863 AlternativeGeneration* alt_gen,
2864 int preload_characters,
2865 bool next_expects_preload) {
2866 if (!alt_gen->possible_success.is_linked()) return true;
2867
2868 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2869 macro_assembler->Bind(&alt_gen->possible_success);
2870 GenerationVariant out_of_line_variant(*variant);
2871 out_of_line_variant.set_characters_preloaded(preload_characters);
2872 out_of_line_variant.set_quick_check_performed(&alt_gen->quick_check_details);
2873 ZoneList<Guard*>* guards = alternative.guards();
2874 int guard_count = (guards == NULL) ? 0 : guards->length();
2875 if (next_expects_preload) {
2876 Label reload_current_char;
2877 out_of_line_variant.set_backtrack(&reload_current_char);
2878 for (int j = 0; j < guard_count; j++) {
2879 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
2880 }
2881 bool ok = alternative.node()->Emit(compiler, &out_of_line_variant);
2882 macro_assembler->Bind(&reload_current_char);
2883 // Reload the current character, since the next quick check expects that.
2884 // We don't need to check bounds here because we only get into this
2885 // code through a quick check which already did the checked load.
2886 macro_assembler->LoadCurrentCharacter(variant->cp_offset(),
2887 NULL,
2888 false,
2889 preload_characters);
2890 macro_assembler->GoTo(&(alt_gen->after));
2891 return ok;
2892 } else {
2893 out_of_line_variant.set_backtrack(&(alt_gen->after));
2894 for (int j = 0; j < guard_count; j++) {
2895 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
2896 }
2897 return alternative.node()->Emit(compiler, &out_of_line_variant);
2898 }
2899 }
2900
2901
2208 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { 2902 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
2209 RegExpMacroAssembler* macro = compiler->macro_assembler(); 2903 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2210 LimitResult limit_result = LimitVersions(compiler, variant); 2904 LimitResult limit_result = LimitVersions(compiler, variant);
2211 if (limit_result == DONE) return true; 2905 if (limit_result == DONE) return true;
2212 if (limit_result == FAIL) return false; 2906 if (limit_result == FAIL) return false;
2213 ASSERT(limit_result == CONTINUE); 2907 ASSERT(limit_result == CONTINUE);
2214 2908
2215 RecursionCheck rc(compiler); 2909 RecursionCheck rc(compiler);
2216 2910
2217 switch (type_) { 2911 switch (type_) {
2218 case STORE_POSITION: { 2912 case STORE_POSITION: {
2219 GenerationVariant::DeferredCapture 2913 GenerationVariant::DeferredCapture
(...skipping 11 matching lines...) Expand all
2231 } 2925 }
2232 case SET_REGISTER: { 2926 case SET_REGISTER: {
2233 GenerationVariant::DeferredSetRegister 2927 GenerationVariant::DeferredSetRegister
2234 new_set(data_.u_store_register.reg, data_.u_store_register.value); 2928 new_set(data_.u_store_register.reg, data_.u_store_register.value);
2235 GenerationVariant new_variant = *variant; 2929 GenerationVariant new_variant = *variant;
2236 new_variant.add_action(&new_set); 2930 new_variant.add_action(&new_set);
2237 return on_success()->Emit(compiler, &new_variant); 2931 return on_success()->Emit(compiler, &new_variant);
2238 } 2932 }
2239 case BEGIN_SUBMATCH: 2933 case BEGIN_SUBMATCH:
2240 if (!variant->is_trivial()) return variant->Flush(compiler, this); 2934 if (!variant->is_trivial()) return variant->Flush(compiler, this);
2241 macro->WriteCurrentPositionToRegister( 2935 assembler->WriteCurrentPositionToRegister(
2242 data_.u_submatch.current_position_register, 0); 2936 data_.u_submatch.current_position_register, 0);
2243 macro->WriteStackPointerToRegister( 2937 assembler->WriteStackPointerToRegister(
2244 data_.u_submatch.stack_pointer_register); 2938 data_.u_submatch.stack_pointer_register);
2245 return on_success()->Emit(compiler, variant); 2939 return on_success()->Emit(compiler, variant);
2246 case POSITIVE_SUBMATCH_SUCCESS: 2940 case POSITIVE_SUBMATCH_SUCCESS:
2247 if (!variant->is_trivial()) return variant->Flush(compiler, this); 2941 if (!variant->is_trivial()) return variant->Flush(compiler, this);
2248 // TODO(erikcorry): Implement support. 2942 // TODO(erikcorry): Implement support.
2249 if (info()->follows_word_interest || 2943 if (info()->follows_word_interest ||
2250 info()->follows_newline_interest || 2944 info()->follows_newline_interest ||
2251 info()->follows_start_interest) { 2945 info()->follows_start_interest) {
2252 return false; 2946 return false;
2253 } 2947 }
2254 if (info()->at_end) { 2948 if (info()->at_end) {
2255 Label at_end; 2949 Label at_end;
2256 // Load current character jumps to the label if we are beyond the string 2950 // Load current character jumps to the label if we are beyond the string
2257 // end. 2951 // end.
2258 macro->LoadCurrentCharacter(0, &at_end); 2952 assembler->LoadCurrentCharacter(0, &at_end);
2259 macro->GoTo(variant->backtrack()); 2953 assembler->GoTo(variant->backtrack());
2260 macro->Bind(&at_end); 2954 assembler->Bind(&at_end);
2261 } 2955 }
2262 macro->ReadCurrentPositionFromRegister( 2956 assembler->ReadCurrentPositionFromRegister(
2263 data_.u_submatch.current_position_register); 2957 data_.u_submatch.current_position_register);
2264 macro->ReadStackPointerFromRegister( 2958 assembler->ReadStackPointerFromRegister(
2265 data_.u_submatch.stack_pointer_register); 2959 data_.u_submatch.stack_pointer_register);
2266 return on_success()->Emit(compiler, variant); 2960 return on_success()->Emit(compiler, variant);
2267 default: 2961 default:
2268 UNREACHABLE(); 2962 UNREACHABLE();
2269 return false; 2963 return false;
2270 } 2964 }
2271 } 2965 }
2272 2966
2273 2967
2274 bool BackReferenceNode::Emit(RegExpCompiler* compiler, 2968 bool BackReferenceNode::Emit(RegExpCompiler* compiler,
2275 GenerationVariant* variant) { 2969 GenerationVariant* variant) {
2276 RegExpMacroAssembler* macro = compiler->macro_assembler(); 2970 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2277 if (!variant->is_trivial()) { 2971 if (!variant->is_trivial()) {
2278 return variant->Flush(compiler, this); 2972 return variant->Flush(compiler, this);
2279 } 2973 }
2280 2974
2281 LimitResult limit_result = LimitVersions(compiler, variant); 2975 LimitResult limit_result = LimitVersions(compiler, variant);
2282 if (limit_result == DONE) return true; 2976 if (limit_result == DONE) return true;
2283 if (limit_result == FAIL) return false; 2977 if (limit_result == FAIL) return false;
2284 ASSERT(limit_result == CONTINUE); 2978 ASSERT(limit_result == CONTINUE);
2285 2979
2286 RecursionCheck rc(compiler); 2980 RecursionCheck rc(compiler);
2287 2981
2288 ASSERT_EQ(start_reg_ + 1, end_reg_); 2982 ASSERT_EQ(start_reg_ + 1, end_reg_);
2289 if (info()->at_end) { 2983 if (info()->at_end) {
2290 // If we are constrained to match at the end of the input then succeed 2984 // If we are constrained to match at the end of the input then succeed
2291 // iff the back reference is empty. 2985 // iff the back reference is empty.
2292 macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack()); 2986 assembler->CheckNotRegistersEqual(start_reg_,
2987 end_reg_,
2988 variant->backtrack());
2293 } else { 2989 } else {
2294 if (compiler->ignore_case()) { 2990 if (compiler->ignore_case()) {
2295 macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack()); 2991 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
2992 variant->backtrack());
2296 } else { 2993 } else {
2297 macro->CheckNotBackReference(start_reg_, variant->backtrack()); 2994 assembler->CheckNotBackReference(start_reg_, variant->backtrack());
2298 } 2995 }
2299 } 2996 }
2300 return on_success()->Emit(compiler, variant); 2997 return on_success()->Emit(compiler, variant);
2301 } 2998 }
2302 2999
2303 3000
2304 // ------------------------------------------------------------------- 3001 // -------------------------------------------------------------------
2305 // Dot/dotty output 3002 // Dot/dotty output
2306 3003
2307 3004
(...skipping 1281 matching lines...) Expand 10 before | Expand all | Expand 10 after
3589 EmbeddedVector<byte, 1024> codes; 4286 EmbeddedVector<byte, 1024> codes;
3590 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4287 RegExpMacroAssemblerIrregexp macro_assembler(codes);
3591 return compiler.Assemble(&macro_assembler, 4288 return compiler.Assemble(&macro_assembler,
3592 node, 4289 node,
3593 data->capture_count, 4290 data->capture_count,
3594 pattern); 4291 pattern);
3595 } 4292 }
3596 4293
3597 4294
3598 }} // namespace v8::internal 4295 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/regexp-macro-assembler.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698